Create separate nest() for multiple columns in a tibble

Reprex below.

Is there a more elegant way to achieve the output?

Looking for enlightenment from #tidyverse zen masters :slight_smile:

library(tidyverse)
    tibble::tribble(
    ~doc_id,             ~term,  ~term_noun,
    "obs_85",                NA,          NA,
    "obs_85",   "open_top_tank",          NA,
    "obs_85",                NA,          NA,
    "obs_85",                NA,      "tank",
    "obs_85",                NA,          NA,
    "obs_85",                NA,          NA,
    "obs_85", "butterfly_valve", "butterfly",
    "obs_85",                NA,     "valve",
    "obs_85",                NA,          NA,
    "obs_85",                NA,          NA,
    "obs_85",                NA,          NA,
    "obs_85",                NA,          NA,
    "obs_85",                NA,          NA,
    "obs_85",                NA,   "finding",
    "obs_85",                NA,          NA
) -> data


data %>% 
    select(doc_id, term) %>%
    filter(!is.na(term)) %>%
    tidyr::nest(term, .key = 'term') %>%
    left_join(    data %>% 
                      select(doc_id, term = term_noun) %>%
                      filter(!is.na(term)) %>%
                      tidyr::nest(term, .key = 'term_noun')) %>% 
    mutate(data = map2(term, term_noun, ~bind_rows(.x, .y))) %>%
    mutate(data = purrr::map(data, ~ as.character(unlist(.x)))) %>%
    select(doc_id, data) %>% 
    deframe()
#> Joining, by = "doc_id"
#> $obs_85
#> [1] "open_top_tank"   "butterfly_valve" "tank"            "butterfly"      
#> [5] "valve"           "finding"

Created on 2019-01-14 by the reprex package (v0.2.1)

Is this the kind of result you want ?

Data
tibble::tribble(
  ~doc_id,             ~term,  ~term_noun,
  "obs_85",                NA,          NA,
  "obs_85",   "open_top_tank",          NA,
  "obs_85",                NA,          NA,
  "obs_85",                NA,      "tank",
  "obs_85",                NA,          NA,
  "obs_85",                NA,          NA,
  "obs_85", "butterfly_valve", "butterfly",
  "obs_85",                NA,     "valve",
  "obs_85",                NA,          NA,
  "obs_85",                NA,          NA,
  "obs_85",                NA,          NA,
  "obs_85",                NA,          NA,
  "obs_85",                NA,          NA,
  "obs_85",                NA,   "finding",
  "obs_85",                NA,          NA
) -> data
library(tidyverse)


res <- data %>%
  gather(term, value, -doc_id) %>%
  filter(!is.na(value))

res 
#> # A tibble: 6 x 3
#>   doc_id term      value          
#>   <chr>  <chr>     <chr>          
#> 1 obs_85 term      open_top_tank  
#> 2 obs_85 term      butterfly_valve
#> 3 obs_85 term_noun tank           
#> 4 obs_85 term_noun butterfly      
#> 5 obs_85 term_noun valve          
#> 6 obs_85 term_noun finding

pull(res, value)
#> [1] "open_top_tank"   "butterfly_valve" "tank"            "butterfly"      
#> [5] "valve"           "finding"

Created on 2019-01-14 by the reprex package (v0.2.1)

4 Likes

@cderv to the rescue again! Thanks for the prompt assistance and the tip :smiley:

This is the final version as I wanted a list:

library(tidyverse)
tibble::tribble(
  ~doc_id,             ~term,  ~term_noun,
  "obs_85",                NA,          NA,
  "obs_85",   "open_top_tank",          NA,
  "obs_85",                NA,          NA,
  "obs_85",                NA,      "tank",
  "obs_85",                NA,          NA,
  "obs_85",                NA,          NA,
  "obs_85", "butterfly_valve", "butterfly",
  "obs_85",                NA,     "valve",
  "obs_85",                NA,          NA,
  "obs_85",                NA,          NA,
  "obs_85",                NA,          NA,
  "obs_85",                NA,          NA,
  "obs_85",                NA,          NA,
  "obs_85",                NA,   "finding",
  "obs_85",                NA,          NA
) -> data

data %>%
  gather(term, value, -doc_id) %>%
  filter(!is.na(value)) %>% 
  select(doc_id, value) %>% 
  nest(value) %>% 
  mutate(data = purrr::map(data, ~ as.character(unlist(.x)))) %>%
  deframe()
#> $obs_85
#> [1] "open_top_tank"   "butterfly_valve" "tank"            "butterfly"      
#> [5] "valve"           "finding"

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.