Reprex below.
Is there a more elegant way to achieve the output?
Looking for enlightenment from #tidyverse zen masters
library(tidyverse)
tibble::tribble(
~doc_id, ~term, ~term_noun,
"obs_85", NA, NA,
"obs_85", "open_top_tank", NA,
"obs_85", NA, NA,
"obs_85", NA, "tank",
"obs_85", NA, NA,
"obs_85", NA, NA,
"obs_85", "butterfly_valve", "butterfly",
"obs_85", NA, "valve",
"obs_85", NA, NA,
"obs_85", NA, NA,
"obs_85", NA, NA,
"obs_85", NA, NA,
"obs_85", NA, NA,
"obs_85", NA, "finding",
"obs_85", NA, NA
) -> data
data %>%
select(doc_id, term) %>%
filter(!is.na(term)) %>%
tidyr::nest(term, .key = 'term') %>%
left_join( data %>%
select(doc_id, term = term_noun) %>%
filter(!is.na(term)) %>%
tidyr::nest(term, .key = 'term_noun')) %>%
mutate(data = map2(term, term_noun, ~bind_rows(.x, .y))) %>%
mutate(data = purrr::map(data, ~ as.character(unlist(.x)))) %>%
select(doc_id, data) %>%
deframe()
#> Joining, by = "doc_id"
#> $obs_85
#> [1] "open_top_tank" "butterfly_valve" "tank" "butterfly"
#> [5] "valve" "finding"
Created on 2019-01-14 by the reprex package (v0.2.1)