Here is my solution
library(tidyverse)
sample_df <- mutate(data.frame(
ID=c(1,1,1,1,2,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5),
D=c('a','b','c','d','e','f','g','y','z','a','b','g','y','d','e','f','g','b','a','y'),
AE=c('m','h','j','k','m','h','j','k','m','j','m','h','l','j','k','m','h','m','o','s')
, stringsAsFactors = FALSE),
AE = paste0(AE,"*"))
dtoks <- sample_df$D %>% unique
aetoks <- sample_df$AE %>% unique
# get possible triplets
(drugpairs <- enframe(combn(dtoks,2,simplify = FALSE)))
(triplets_0 <- expand.grid(drugpairs$name,aetoks))
(triplets <- left_join(drugpairs,
triplets_0,
by=c("name"="Var1")) %>% rename(
AE=Var2,
drugpair = value
))
triplet_in_id <- function(IDnum,triplet){
df_of_id <- sample_df %>% filter(ID==IDnum)
dpair <- triplet$drugpair %>% unlist
tae <- triplet$AE
# check if both dpair are in the set
both_drugs_in <- dpair[[1]] %in% df_of_id$D & dpair[[2]] %in% df_of_id$D
# check if the AE of the triplet pair is present with one of the pair
tae_df <- filter(df_of_id,AE==tae)
if(nrow(tae_df)<1)
return(FALSE)
ae_with_one_of_paired_drugs <- any(pull(tae_df,D) %in% dpair)
#both conditions have to be TRUE
both_drugs_in & ae_with_one_of_paired_drugs
}
scan_id_for_all_triplets <- function(IDnum){
map_lgl(1:nrow(triplets),
~triplet_in_id(IDnum,triplets[.,]))
}
(ids_scanned_for_trips <- map_dfc(sample_df$ID %>% unique,
~enframe(name=NULL,
value=.,
scan_id_for_all_triplets(.))))
ids_scanned_for_trips$sumIDhits <- rowSums(ids_scanned_for_trips)
(collated_df <- bind_cols(triplets,ids_scanned_for_trips))
(trips_of_interest <- filter(collated_df,
sumIDhits>2))
# # A tibble: 1 x 9
# name drugpair AE `1` `2` `3` `4` `5` sumIDhits
# <int> <list> <fct> <lgl> <lgl> <lgl> <lgl> <lgl> <dbl>
# 1 1 <chr [2]> m* TRUE FALSE TRUE FALSE TRUE 3
trips_of_interest %>% unnest(cols=c(drugpair))
# A tibble: 2 x 9
# name drugpair AE `1` `2` `3` `4` `5` sumIDhits
# <int> <chr> <fct> <lgl> <lgl> <lgl> <lgl> <lgl> <dbl>
# 1 1 a m* TRUE FALSE TRUE FALSE TRUE 3
# 2 1 b m* TRUE FALSE TRUE FALSE TRUE 3