Another solution, heavily influenced by the answer here. This is a bit faster if I use same data as Nir, but I haven't tested with larger data.
set.seed(seed = 118151)
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(purrr)
library(tidyr)
player_data <- tibble(player = paste0("player", 1:20),
score = runif(n = 20,
min = 4,
max = 16.7),
index = runif(n = 20,
min = -1,
max = 9),
role = sample(x = rep.int(x = c("C", "F", "G"),
times = c(4, 8, 8))))
tic <- proc.time()
# based on the following thread
# https://forum.posit.co/t/create-all-possible-combinations-of-a-data-frame/26848
create_combinations <- function(all_data, player_role, player_count)
{
role_data <- all_data |>
filter(role == player_role) |>
select(!role)
role_rows <- role_data |>
group_by(across(.cols = everything())) |>
group_split()
column_names <- seq_len(length.out = player_count) |>
map(.f = ~ paste(player_role, names(x = role_data), .x,
sep = "_")) |>
flatten_chr()
role_data |>
nrow() |>
combn(m = player_count) |>
t() |>
`colnames<-`(value = seq_len(length.out = player_count)) |>
as_tibble() |>
mutate(across(.cols = everything(),
.fns = ~ map(.x = .x,
.f = ~ pluck(.x = role_rows,
.x)))) |>
unnest(cols = everything(),
names_repair = "minimal") |>
set_names(nm = column_names) |>
rowwise() |>
mutate("{player_role}_score" := sum(across(.cols = matches(match = "score"))),
"{player_role}_index" := sum(across(.cols = matches(match = "index")))) |>
ungroup()
}
roles <- c("C", "F", "G")
counts <- c(2, 4, 4)
role_combinations <- map2(.x = roles,
.y = counts,
.f = ~ create_combinations(all_data = player_data,
player_role = .x,
player_count = .y))
top_combinations <- role_combinations |>
reduce(.f = ~ full_join(x = .x,
y = .y,
by = character())) |>
mutate(team_score = C_score + F_score + G_score,
team_index = C_index + F_index + G_index) |>
filter(between(x = team_score,
left = 95.5,
right = 100.4)) |>
slice_max(order_by = team_index,
n = 10)
toc <- proc.time()
# top team
top_combinations |>
slice_head(n = 1) |>
select(matches(match = "player")) |>
t()
#> [,1]
#> C_player_1 "player15"
#> C_player_2 "player20"
#> F_player_1 "player12"
#> F_player_2 "player14"
#> F_player_3 "player16"
#> F_player_4 "player3"
#> G_player_1 "player11"
#> G_player_2 "player17"
#> G_player_3 "player4"
#> G_player_4 "player8"
toc - tic
#> user system elapsed
#> 0.660 0.008 0.669