So there are likely a number of ways to set this up, but here's what my approach would be:
Step 1: rearrange the data such that...
- Columns represent the advertisements
- Each row represents a group/name entry
- Cells have a value of 1 if Adv_i was binned in the corresponding group, otherwise 0.
library(tidyverse)
AdvPreTest <- tribble(
~Name, ~Groups, ~Value,
"AB282", "group1", "Adv1",
"AB282", "group1", "Adv2",
"AB282", "group1", "Adv3",
"AB282", "group2", "Adv4",
"AB282", "group2", "Adv5",
"AB20", "group3", "Adv1",
"AB20", "group3", "Adv2",
"AB20", "group2", "Adv3",
"AB20", "group2", "Adv4",
"AB20", "group2", "Adv5",
"LM28", "group3", "Adv1",
"LM28", "group3", "Adv2",
"LM28", "group3", "Adv3",
"LM28", "group2", "Adv4",
"LM28", "group2", "Adv5",
"GM25", "group2", "Adv1",
"GM25", "group2", "Adv2",
"GM25", "group2", "Adv3",
"GM25", "group1", "Adv4",
"GM25", "group1", "Adv5"
)
AdvClean <-
AdvPreTest %>%
# ads placed in similar groups receive a value of 1
mutate(dummy = 1) %>%
# put ads in the columns, fill cells with 1/0
pivot_wider(
names_from = Value,
values_from = dummy,
values_fill = list(dummy = 0)
)
AdvClean
#> # A tibble: 8 x 7
#> Name Groups Adv1 Adv2 Adv3 Adv4 Adv5
#> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 AB282 group1 1 1 1 0 0
#> 2 AB282 group2 0 0 0 1 1
#> 3 AB20 group3 1 1 0 0 0
#> 4 AB20 group2 0 0 1 1 1
#> 5 LM28 group3 1 1 1 0 0
#> 6 LM28 group2 0 0 0 1 1
#> 7 GM25 group2 1 1 1 0 0
#> 8 GM25 group1 0 0 0 1 1
Step 2: initialize a matrix to hold the co-occurrences
# only need the Advertisement columns
AdvClean <- select(AdvClean, Adv1:Adv5)
M <- ncol(AdvClean)
co_occur <- matrix(
nrow = M, ncol = M,
dimnames = list(
names(AdvClean),
names(AdvClean)
)
)
co_occur
#> Adv1 Adv2 Adv3 Adv4 Adv5
#> Adv1 NA NA NA NA NA
#> Adv2 NA NA NA NA NA
#> Adv3 NA NA NA NA NA
#> Adv4 NA NA NA NA NA
#> Adv5 NA NA NA NA NA
Step 3: use a for loop to tabulate the results
for (i in 1:M) {
for (j in 1:M) {
# logical vector of if i and j are grouped together
grouped.together <- AdvClean[[i]] == 1 & AdvClean[[j]] == 1
# sum that vector to tally results
co_occur[i, j] <- sum(grouped.together)
}
}
co_occur
#> Adv1 Adv2 Adv3 Adv4 Adv5
#> Adv1 4 4 3 0 0
#> Adv2 4 4 3 0 0
#> Adv3 3 3 4 1 1
#> Adv4 0 0 1 4 4
#> Adv5 0 0 1 4 4
I see you're a new poster, so this might seem like there's a lot going on here if you're new to R, let me know if anything needs clarification!