I would like to use dplyr::summarize with a custom function that operates on groups within a data frame. Specifically, I want to create a custom rank based on the relative score of grouped entries. See example below:
library(dplyr)
dat <- data.frame(grp = rep(LETTERS[1:3], each = 3),
score = rnorm(n = 9),
correct = rep(c(T,F,F), 3))
dat
#> grp score correct
#> 1 A -0.4197577 TRUE
#> 2 A -1.8704389 FALSE
#> 3 A 0.9077843 FALSE
#> 4 B 0.7294849 TRUE
#> 5 B -1.5251637 FALSE
#> 6 B -0.3363766 FALSE
#> 7 C -0.4401990 TRUE
#> 8 C 1.5558136 FALSE
#> 9 C -0.8925630 FALSE
# for each correct entry, determine the relative rank position (RRP) within the group...
# RRP = 0.5 * (1 - ((BC - WC)/(TC - 1)))
# BC : candidates with better score
# WC: candidates with worse score
# TC: total candidates
# in the case of grp == 'A'...
with(dat %>%
group_by(grp) %>%
filter(grp == 'A'), {
0.5 * (1 - (length(which(score > score[correct])) - length(which(score < score[correct]))) /
(length(score) - 1))
})
#> [1] 0.5
# work around using sapply...
sapply(
unique(dat$grp),
function(x_grp){
with(dat %>%
group_by(grp) %>%
filter(grp == !!x_grp), {
0.5 * (1 - (length(which(score > score[correct])) - length(which(score < score[correct]))) /
(length(score) - 1))
})
}) %>% cbind(dat[which(dat$correct),], rrp = .)
#> grp score correct rrp
#> 1 A -0.4197577 TRUE 0.5
#> 4 B 0.7294849 TRUE 1.0
#> 7 C -0.4401990 TRUE 0.5
# what I want to do...
dat %>% group_by(grp) %>% summarize(rrp = funs())
Created on 2019-07-23 by the reprex package (v0.3.0)