Get only orders where 2 products are bought within

It would be easier to help you if you provide a REPRoducible EXample (reprex), but here is an example of a possible solution.

library(dplyr)
library(stringr)

set.seed(1)
sample_data <- data.frame(transaction_id = sample(1:3, 7, replace = TRUE),
                          product = sample(c("X", "Y", "Z"), 7, replace = TRUE))

sample_data %>% 
    arrange(transaction_id)
#>   transaction_id product
#> 1              1       Y
#> 2              1       Z
#> 3              1       X
#> 4              2       Z
#> 5              3       Y
#> 6              3       X
#> 7              3       X

sample_data %>% 
    arrange(transaction_id) %>% 
    group_by(transaction_id) %>% 
    summarise(product = paste(product, collapse = ",")) %>% 
    filter(str_detect(product, "[XY]{1}.*[XY]{1}"))
#> # A tibble: 2 x 2
#>   transaction_id product
#>            <int> <chr>  
#> 1              1 Y,Z,X  
#> 2              3 Y,X,X

Created on 2019-10-17 by the reprex package (v0.3.0.9000)

2 Likes