dplyr sample with balanced subgroups

I have a dataset that looks like this:

sex <- c(rep('M', 20), rep('F',20))
method <- rep(c('M1', 'M2', 'M3', 'M4'), 10)
values <- rnorm(40)
subj <- c(rep(letters[1:5], 4), rep(letters[11:15], 4))
df <- data.frame(subj = subj, sex = sex, method = method, values = values)

I would like to sample 3 males and 3 females and for each sampled individual keep all the 'method' data. I tried e.g.

group_by(sex, subj)

but this does not give me what I want. Am I on the right track here or is there a better approach.
thx

sex <- c(rep('M', 20), rep('F',20))
method <- rep(c('M1', 'M2', 'M3', 'M4'), 10)
values <- rnorm(40)
subj <- c(rep(letters[1:5], 4), rep(letters[11:15], 4))
dat <- data.frame(subj = subj, sex = sex, method = method, values = values)

sample(dat[which(dat[2] == "M"),],3)
#>    method      values sex
#> 1      M1  2.20965251   M
#> 2      M2 -0.14203389   M
#> 3      M3 -0.61762284   M
#> 4      M4  0.12128696   M
#> 5      M1 -0.52786274   M
#> 6      M2 -1.86043917   M
#> 7      M3  1.41856714   M
#> 8      M4 -0.43938903   M
#> 9      M1  0.09396233   M
#> 10     M2  0.28342576   M
#> 11     M3  0.96422619   M
#> 12     M4 -0.89691550   M
#> 13     M1  0.81207157   M
#> 14     M2 -1.30529445   M
#> 15     M3  1.92266475   M
#> 16     M4  0.52111344   M
#> 17     M1 -1.01972322   M
#> 18     M2 -0.25202362   M
#> 19     M3  0.63510994   M
#> 20     M4  0.69183068   M
sample(dat[which(dat[2] == "F"),],3)
#>    method sex      values
#> 21     M1   F -1.37005522
#> 22     M2   F -1.01249177
#> 23     M3   F  0.41332046
#> 24     M4   F  0.14799967
#> 25     M1   F  0.57908271
#> 26     M2   F -0.75718008
#> 27     M3   F -0.48365900
#> 28     M4   F -0.24647612
#> 29     M1   F -1.51754735
#> 30     M2   F -0.83054045
#> 31     M3   F -0.22494643
#> 32     M4   F  0.94522571
#> 33     M1   F  0.65227746
#> 34     M2   F -1.00460222
#> 35     M3   F -0.12996145
#> 36     M4   F -0.82446510
#> 37     M1   F  0.62815323
#> 38     M2   F  0.05563906
#> 39     M3   F  0.09552943
#> 40     M4   F -0.36153015

# better

sample_group <- function(w,x,y,z) sample(w[which(w[x] == y),],z)

sample_group(dat,"sex","M",3)
#>    method sex subj
#> 1      M1   M    a
#> 2      M2   M    b
#> 3      M3   M    c
#> 4      M4   M    d
#> 5      M1   M    e
#> 6      M2   M    a
#> 7      M3   M    b
#> 8      M4   M    c
#> 9      M1   M    d
#> 10     M2   M    e
#> 11     M3   M    a
#> 12     M4   M    b
#> 13     M1   M    c
#> 14     M2   M    d
#> 15     M3   M    e
#> 16     M4   M    a
#> 17     M1   M    b
#> 18     M2   M    c
#> 19     M3   M    d
#> 20     M4   M    e

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.