Finding the best possible combinations based on multiple conditions with R dplyr

Another solution, heavily influenced by the answer here. This is a bit faster if I use same data as Nir, but I haven't tested with larger data.

set.seed(seed = 118151)

library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(purrr)
library(tidyr)

player_data <- tibble(player = paste0("player", 1:20),
                      score = runif(n = 20,
                                    min = 4,
                                    max = 16.7),
                      index = runif(n = 20,
                                    min = -1,
                                    max = 9),
                      role = sample(x = rep.int(x = c("C", "F", "G"),
                                                times = c(4, 8, 8))))

tic <- proc.time()

# based on the following thread
# https://forum.posit.co/t/create-all-possible-combinations-of-a-data-frame/26848
create_combinations <- function(all_data, player_role, player_count)
{
    role_data <- all_data |>
        filter(role == player_role) |>
        select(!role)
    role_rows <- role_data |>
        group_by(across(.cols = everything())) |>
        group_split()
    column_names <- seq_len(length.out = player_count) |>
        map(.f = ~ paste(player_role, names(x = role_data), .x,
                         sep = "_")) |>
        flatten_chr()
    role_data |>
        nrow() |>
        combn(m = player_count) |>
        t() |>
        `colnames<-`(value = seq_len(length.out = player_count)) |>
        as_tibble() |>
        mutate(across(.cols = everything(),
                      .fns = ~ map(.x = .x,
                                   .f = ~ pluck(.x = role_rows,
                                                .x)))) |>
        unnest(cols = everything(),
               names_repair = "minimal") |>
        set_names(nm = column_names) |>
        rowwise() |>
        mutate("{player_role}_score" := sum(across(.cols = matches(match = "score"))),
               "{player_role}_index" := sum(across(.cols = matches(match = "index")))) |>
        ungroup()
}

roles <- c("C", "F", "G")
counts <- c(2, 4, 4)

role_combinations <- map2(.x = roles,
                          .y = counts,
                          .f = ~ create_combinations(all_data = player_data,
                                                     player_role = .x,
                                                     player_count = .y))

top_combinations <- role_combinations |>
    reduce(.f = ~ full_join(x = .x,
                            y = .y,
                            by = character())) |>
    mutate(team_score = C_score + F_score + G_score,
           team_index = C_index + F_index + G_index) |>
    filter(between(x = team_score,
                   left = 95.5,
                   right = 100.4)) |>
    slice_max(order_by = team_index,
              n = 10)

toc <- proc.time()

# top team
top_combinations |>
    slice_head(n = 1) |>
    select(matches(match = "player")) |>
    t()
#>            [,1]      
#> C_player_1 "player15"
#> C_player_2 "player20"
#> F_player_1 "player12"
#> F_player_2 "player14"
#> F_player_3 "player16"
#> F_player_4 "player3" 
#> G_player_1 "player11"
#> G_player_2 "player17"
#> G_player_3 "player4" 
#> G_player_4 "player8"

toc - tic
#>    user  system elapsed 
#>   0.660   0.008   0.669
1 Like