In the code here, I dont consider female case, you can adapt the male example to do that.
I also didnt do the full dataset, but a random sample of 20.
library(dslabs)
library(tidyverse )
set.seed(2)
options(pillar.sigfig=4) # i want to see 4 significant digits in my tibble print
# im going to work with a random 20 observations rather than the full dataset
# also having the dataframe in the tibble flavour for prettier printing
short_h <-sample_n( dslabs::heights, size = 20) %>% as_tibble()
#example of using glimpse to get a view of the dataframe on its side
glimpse(short_h)
#Observations: 20
#Variables: 2
$ sex <fct> Male, Male, Male, Male, Male, Male, Male, Male, Male, Male, Male, Female, Male, Fem...
$ height <dbl> 66.00000, 68.00000, 61.00000, 66.92000, 70.00000, 71.00000, 67.00000, 70.00000, 68....
male_vec<- short_h$height[short_h$sex=="Male"]
male_vec
[1] 66.00000 68.00000 61.00000 66.92000 70.00000 71.00000 67.00000 70.00000 68.00000 66.92913 66.14173
[12] 71.00000 65.00000 67.00000 67.00000 69.00000 73.00000 62.99213
avg_male <- median(male_vec)
avg_male
[1] 67
#while the median of 1,2,3 -- median(1:3) == 2 , the median of 1,2 ==1.5 therefore finding the first individual in the list that exactly matches would not be possible
# we have to apply some measure of distance
male_df <- short_h %>% filter(sex=="Male")
male_df
# A tibble: 18 x 2
sex height
<fct> <dbl>
1 Male 66
2 Male 68
3 Male 61
4 Male 66.92
5 Male 70
6 Male 71
7 Male 67
8 Male 70
9 Male 68
10 Male 66.93
11 Male 66.14
12 Male 71
13 Male 65
14 Male 67
15 Male 67
16 Male 69
17 Male 73
18 Male 62.99
male_df2 <- male_df %>% mutate(rnum = row_number(),
abs_dist_from_avg = abs(height-avg_male))
male_df2
# A tibble: 18 x 4
sex height rnum abs_dist_from_avg
<fct> <dbl> <int> <dbl>
1 Male 66 1 1
2 Male 68 2 1
3 Male 61 3 6
4 Male 66.92 4 0.08000
5 Male 70 5 3
6 Male 71 6 4
7 Male 67 7 0
8 Male 70 8 3
9 Male 68 9 1
10 Male 66.93 10 0.07087
11 Male 66.14 11 0.8583
12 Male 71 12 4
13 Male 65 13 2
14 Male 67 14 0
15 Male 67 15 0
16 Male 69 16 2
17 Male 73 17 6
18 Male 62.99 18 4.008
male_df3 <- arrange(male_df2,abs_dist_from_avg,rnum)
male_df3
# A tibble: 18 x 4
sex height rnum abs_dist_from_avg
<fct> <dbl> <int> <dbl>
1 Male 67 7 0
2 Male 67 14 0
3 Male 67 15 0
4 Male 66.93 10 0.07087
5 Male 66.92 4 0.08000
6 Male 66.14 11 0.8583
7 Male 66 1 1
8 Male 68 2 1
9 Male 68 9 1
10 Male 65 13 2
11 Male 69 16 2
12 Male 70 5 3
13 Male 70 8 3
14 Male 71 6 4
15 Male 71 12 4
16 Male 62.99 18 4.008
17 Male 61 3 6
18 Male 73 17 6
male_df4 <- head(male_df3,1) # take the top row
male_df4
# A tibble: 1 x 4
sex height rnum abs_dist_from_avg
<fct> <dbl> <int> <dbl>
1 Male 67 7 0
avg_male_position <- pull(male_df4,rnum) # take the single indice/row number from it (rnum)
avg_male_position
[1] 7
# we can go back to male_df2 and add our final metrics to that
final_male_df <- male_df2 %>% mutate(height_ge_median = height >= avg_male,
row_ge_avg_pos = rnum >= avg_male_position)
final_male_df
# A tibble: 18 x 6
sex height rnum abs_dist_from_avg height_ge_median row_ge_avg_pos
<fct> <dbl> <int> <dbl> <lgl> <lgl>
1 Male 66 1 1 FALSE FALSE
2 Male 68 2 1 TRUE FALSE
3 Male 61 3 6 FALSE FALSE
4 Male 66.92 4 0.08000 FALSE FALSE
5 Male 70 5 3 TRUE FALSE
6 Male 71 6 4 TRUE FALSE
7 Male 67 7 0 TRUE TRUE
8 Male 70 8 3 TRUE TRUE
9 Male 68 9 1 TRUE TRUE
10 Male 66.93 10 0.07087 FALSE TRUE
11 Male 66.14 11 0.8583 FALSE TRUE
12 Male 71 12 4 TRUE TRUE
13 Male 65 13 2 FALSE TRUE
14 Male 67 14 0 TRUE TRUE
15 Male 67 15 0 TRUE TRUE
16 Male 69 16 2 TRUE TRUE
17 Male 73 17 6 TRUE TRUE
18 Male 62.99 18 4.008 FALSE TRUE