Collapse intersection using the same vector list?

Hello,
I'm in a strange situation. I think I can't explain It clearly.
I hope you can give me a hand.

a1=c(1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)
a2=c(1,0,0,1,1,1,1,0,0,1,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)
a3=c(1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)
a_total=c(3,0,1,2,1,1,1,1,1,1,1,1,1,3,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)
dataz=data.frame(a1,a2,a3,a_total)

chiero=as.data.frame(c(20,2,5,3,0,0,3,2))
listX=c("no_values","a1","a2","a3","a1_and_a2","a1_and_a3","a2_and_a3","a1_a2_a3")
rownames(chiero)=listX
colnames(chiero)="events_"
chiero

I sum the columns a1, a2, and a3 in order to produce a_total. But after that, I don't know how to produce the chiero data frame, which count every possible scenario. I mean, I require to produce columns considering every chance of success. Success is considering when one of the columns at least value 1. And I consider the failure when a1 to a3 are zero.

Imagine doing that with 8 or more columns...
Thanks for your help and time.

If you're open to a tidyverse solution, I think this is quite elegant.

library(dplyr, warn.conflicts = FALSE)
library(tidyr)

dataz <- data.frame(
  a1 = c(1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
  a2 = c(1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
  a3 = c(1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
)

dataz %>%
  unite(a_comb, a1:a3, sep = "") %>%
  count(a_comb) %>%
  mutate(a_comb = recode(a_comb,
    "000" = "no values", "100" = "a1", "010" = "a2", "001" = "a3",
    "110" = "a1_a2", "101" = "a1_a3", "011" = "a2_a3", "111" = "a1_a2_a3"
  ))
#>      a_comb  n
#> 1 no values 20
#> 2        a3  3
#> 3        a2  5
#> 4     a2_a3  3
#> 5        a1  2
#> 6  a1_a2_a3  2

Created on 2020-09-07 by the reprex package (v0.3.0)

The recode part won't scale well with 8 input columns as it would be tedious to manually define each possible combination so I'd suggest leaving it as "000", "001" etc.

1 Like

This is best thought of in terms of a matrix-like object of binary values in which each row of the matrix represents a number in base 2. For n columns, there are n^2 distinct rows. For n = 3, that is 8 and for n = 8, 256.

In the general case, a problem in R should be thought of as finding a solution to f(x) = y. f, x, \&\space y are all objects, and those objects, including the function, f may be composed of other objects.

Considering dataz as a matrix, we have x. For y we seek a data frame containing two columns, call them set and n, where set is the number base2 that corresponds to the presence or absence of some attribute in x and N is the number of occurrences.

For example, in the dataz example, a row of x evaluating to 4, (binary 1\space0\space0) indicates the value of 1 for a1 and values of 0 for a2 and a3. For the case in which there are a1 ... a8, a row evaluating to 256 (binary 1\space0\space0\space0\space0\space0\space0\space0\space0) is comparable.

To obtain the tabulation, dplyr provides summarize() and count()

suppressPackageStartupMessages(library(dplyr))
a1=c(1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)
a2=c(1,0,0,1,1,1,1,0,0,1,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)
a3=c(1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)
a_total=c(3,0,1,2,1,1,1,1,1,1,1,1,1,3,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)
dataz=data.frame(a1,a2,a3,a_total)
dataz %>% select(-a_total) %>%
          group_by(a1,a2,a3) %>%
          count()
#> # A tibble: 6 x 4
#> # Groups:   a1, a2, a3 [6]
#>      a1    a2    a3     n
#>   <dbl> <dbl> <dbl> <int>
#> 1     0     0     0    20
#> 2     0     0     1     3
#> 3     0     1     0     5
#> 4     0     1     1     3
#> 5     1     0     0     2
#> 6     1     1     1     2

Created on 2020-09-07 by the reprex package (v0.3.0)

Other transformations depend on the details of representation of the results.

I wanted to take technocrats solution and use combinatronics to work out the a1 a2 a3 combinations.
You will see that these are defined in my vector named basis

library(tidyverse)
library(rlang)
dataz=data.frame(a1=c(1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
                 a2=c(1,0,0,1,1,1,1,0,0,1,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
                 a3=c(1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
                 a_total=c(3,0,1,2,1,1,1,1,1,1,1,1,1,3,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0))


(dataz2 <- dataz %>% select(-a_total) %>%
  group_by(a1,a2,a3) %>%
  count())


basis <- c("a1", "a2", "a3")

(find_combs <- purrr::map(1:3, 
                         ~ combn(x = basis,
                                 m = .,
                                 simplify = FALSE)) %>%
  flatten() )

find_combs<-append(find_combs,"",after=0)

(anti_combs <- map(find_combs,~setdiff(basis,.)))

(nice_combs_names <- map_chr(find_combs,
                        ~ paste0(., collapse = "_and_")))

(combs_logicals <- map2_chr(find_combs,anti_combs,
                           ~ paste0(
                             if_else(.x[[1]]!="",paste0(.x, collapse = " & " ),"TRUE"),
                                    if_else(length(.y)>0,paste0(
                                      " & !(",paste0(.y,collapse = " | " ),")"),""))))

#filter on the previously made dataz2

res1 <- map(combs_logicals,
    ~filter(ungroup(dataz2),eval(parse_expr(.))) %>% pull(n))

res2 <- map_int(res1,
                ~if(length(.)>0){.} else{0L})
names(res2) <- nice_combs_names

enframe(res2)
 A tibble: 8 x 2
  name               value
  <chr>              <int>
1 ""                    20
2 "a1"                   2
3 "a2"                   5
4 "a3"                   3
5 "a1_and_a2"            0
6 "a1_and_a3"            0
7 "a2_and_a3"            3
8 "a1_and_a2_and_a3"     2
2 Likes

How does the result get to a tibble with name generated programmatically?

library(tidyverse)
library(rlang)
#> 
#> Attaching package: 'rlang'
#> The following objects are masked from 'package:purrr':
#> 
#>     %@%, as_function, flatten, flatten_chr, flatten_dbl, flatten_int,
#>     flatten_lgl, flatten_raw, invoke, list_along, modify, prepend,
#>     splice
dataz=data.frame(a1=c(1,0,1,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
                 a2=c(1,0,0,1,1,1,1,0,0,1,0,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
                 a3=c(1,0,0,1,0,0,0,1,1,0,1,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0),
                 a_total=c(3,0,1,2,1,1,1,1,1,1,1,1,1,3,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0))


(dataz2 <- dataz %>% select(-a_total) %>%
        group_by(a1,a2,a3) %>%
        count())
#> # A tibble: 6 x 4
#> # Groups:   a1, a2, a3 [6]
#>      a1    a2    a3     n
#>   <dbl> <dbl> <dbl> <int>
#> 1     0     0     0    20
#> 2     0     0     1     3
#> 3     0     1     0     5
#> 4     0     1     1     3
#> 5     1     0     0     2
#> 6     1     1     1     2


basis <- c("a1", "a2", "a3")

(find_combs <- purrr::map(1:3, 
                          ~ combn(x = basis,
                                  m = .,
                                  simplify = FALSE)) %>%
        flatten() )
#> [[1]]
#> [1] "a1"
#> 
#> [[2]]
#> [1] "a2"
#> 
#> [[3]]
#> [1] "a3"
#> 
#> [[4]]
#> [1] "a1" "a2"
#> 
#> [[5]]
#> [1] "a1" "a3"
#> 
#> [[6]]
#> [1] "a2" "a3"
#> 
#> [[7]]
#> [1] "a1" "a2" "a3"

Created on 2020-09-07 by the reprex package (v0.3.0)

I paste them together with _and_ characters and save the result to nice_combs_names

1 Like

I would like to thank you, guys.
I tried both methods, and they were easy to run.
Thanks again for your time and replies.
I'll declare this thread as SOLVED

1 Like

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.