Hi @stevemidway and welcome!
Interesting problem. I approached this in 3 steps.
- Tabulating the number of favorite colors per group,
df_summarise.
- Creating a lookup table of possible color combinations ,
combs_tabulated, with a column outcome indicating which of the possible combinations of the 3 colors was used.
- Matching the lookup table to
df_summarise and assigning the outcome to each group.
library("tidyverse")
group <- c("a","a","a","b","b","b","c","c","c")
color <- c("red","blue","yellow","blue","blue","blue", "yellow","yellow","red")
df <- tibble(group, color) %>%
mutate(group = factor(group),
color = factor(color))
# Create table summarizing number of favorite colors picked for each group.
df_summarise <- df %>%
mutate(n = 1) %>%
pivot_wider(id_cols = group, names_from = color, values_from = n,
values_fn = sum, values_fill = 0)
df_summarise
#> # A tibble: 3 × 4
#> group red blue yellow
#> <fct> <dbl> <dbl> <dbl>
#> 1 a 1 1 1
#> 2 b 0 3 0
#> 3 c 1 0 2
# Create a dummy coded list of all combinations
combs <- as.data.frame(gtools::combinations(3, 3,
v = c("red", "yellow", "blue"),
repeats.allowed = TRUE)) %>%
mutate(outcome = seq(nrow(.)))
combs_tabulated <- combs %>%
pivot_longer(cols = V1:V3) %>%
mutate(n = 1) %>%
pivot_wider(id_cols = outcome, names_from = value, values_from = n,
values_fn = sum, values_fill = 0)
combs_tabulated
#> # A tibble: 10 × 4
#> outcome blue red yellow
#> <int> <dbl> <dbl> <dbl>
#> 1 1 3 0 0
#> 2 2 2 1 0
#> 3 3 2 0 1
#> 4 4 1 2 0
#> 5 5 1 1 1
#> 6 6 1 0 2
#> 7 7 0 3 0
#> 8 8 0 2 1
#> 9 9 0 1 2
#> 10 10 0 0 3
# Match results with table of combinations
group_categorized <- df_summarise %>%
left_join(combs_tabulated)
#> Joining, by = c("red", "blue", "yellow")
group_categorized
#> # A tibble: 3 × 5
#> group red blue yellow outcome
#> <fct> <dbl> <dbl> <dbl> <int>
#> 1 a 1 1 1 5
#> 2 b 0 3 0 1
#> 3 c 1 0 2 9
Created on 2022-05-08 by the reprex package (v2.0.1)