Find an indice of a vector

Hi, how can i find an indice from an vetor to know his position.

ex: I have a vector non ordered with more than 1000 indices. I find the mean of this vector end want to know his position.

it seems to me likely that once you calculate the mean of the vector, you might find that none, or all of the members of the vector equal that mean value. What should be returned in such cases?
Generally though, the which() function is a fine approach to looking up an index in of a member of a vector. for example

my_vec <- c(1,100,99,101,100)
which(my_vec == 99) # answer 3
which(my_vec == 100) # answer 2 , only first found is returned
1 Like

Hi, I'm trying to get all number above the mean and above the indice of that mean. but its nor working. if you know how to do it better, i'm learning :slight_smile:

library(dslabs)
data("heights")
male <- heights$height[heights$sex=="Male"]
female <- heights$height[heights$sex=="Female"]
avg_male <- mean(male)
avg_female <- mean(female)
ind_male <- which(male == avg_male)
avg_female <- as.integer(avg_female)
ind_male <- which(male == avg_male) #not working
ind_female <- which(female==avh_female) #not working
abv_avg_male <- c(sort(male)[ind_male:length(male)]
abv_avg_female <- c(sort(female)[ind_female:length(female)]
abv_ind_male <- c(male[ind_male:length(male)]
abv_ind_female <- c(female[ind_female:length(female)]

I understand wanting to get all that are above the mean, but not sure what it means that you also want the indice be above ? given that you have unordered data... do you intend to first order your data so that those above the average will also be above the indice. ?

1 Like

No, I want to get the unordered data above the mean and ordered data above the mean too, to undertand how to do it. but in this case, the mean get two middle numbers and divide by 2. I doesnt know if I can do without we conditional expression

opsy, forgot what I said about the mean, I was thinking about the median. It will work better if was median. I guess

In the code here, I dont consider female case, you can adapt the male example to do that.
I also didnt do the full dataset, but a random sample of 20.

  library(dslabs)
  library(tidyverse )
  set.seed(2)
  options(pillar.sigfig=4) # i want to see 4 significant digits in my tibble print
  
    # im going to work with a random 20 observations rather than the full dataset
    # also having the dataframe in the tibble flavour for prettier printing
    short_h <-sample_n( dslabs::heights, size = 20) %>% as_tibble()
    
     #example of using glimpse to get a view of the dataframe on its side
  glimpse(short_h)

  #Observations: 20
  #Variables: 2
  $ sex    <fct> Male, Male, Male, Male, Male, Male, Male, Male, Male, Male, Male, Female, Male, Fem...
  $ height <dbl> 66.00000, 68.00000, 61.00000, 66.92000, 70.00000, 71.00000, 67.00000, 70.00000, 68....
   
   male_vec<- short_h$height[short_h$sex=="Male"]
    male_vec
  [1] 66.00000 68.00000 61.00000 66.92000 70.00000 71.00000 67.00000 70.00000 68.00000 66.92913 66.14173
  [12] 71.00000 65.00000 67.00000 67.00000 69.00000 73.00000 62.99213
    
      
   avg_male <- median(male_vec)
   avg_male
  [1] 67
   
    
    #while the median of 1,2,3  -- median(1:3) == 2 , the median of 1,2 ==1.5 therefore finding the first individual in the list that exactly matches would not be possible
    # we have to apply some measure of distance
    male_df <- short_h %>% filter(sex=="Male")
   male_df 
  # A tibble: 18 x 2
  sex   height
  <fct>  <dbl>
    1 Male   66   
  2 Male   68   
  3 Male   61   
  4 Male   66.92
  5 Male   70   
  6 Male   71   
  7 Male   67   
  8 Male   70   
  9 Male   68   
  10 Male   66.93
  11 Male   66.14
  12 Male   71   
  13 Male   65   
  14 Male   67   
  15 Male   67   
  16 Male   69   
  17 Male   73   
  18 Male   62.99
   male_df2 <- male_df %>% mutate(rnum = row_number(),
                                abs_dist_from_avg = abs(height-avg_male))
   male_df2
  # A tibble: 18 x 4
  sex   height  rnum abs_dist_from_avg
  <fct>  <dbl> <int>             <dbl>
    1 Male   66        1           1      
  2 Male   68        2           1      
  3 Male   61        3           6      
  4 Male   66.92     4           0.08000
  5 Male   70        5           3      
  6 Male   71        6           4      
  7 Male   67        7           0      
  8 Male   70        8           3      
  9 Male   68        9           1      
  10 Male   66.93    10           0.07087
  11 Male   66.14    11           0.8583 
  12 Male   71       12           4      
  13 Male   65       13           2      
  14 Male   67       14           0      
  15 Male   67       15           0      
  16 Male   69       16           2      
  17 Male   73       17           6      
  18 Male   62.99    18           4.008  
  male_df3 <- arrange(male_df2,abs_dist_from_avg,rnum)
   male_df3
  # A tibble: 18 x 4
  sex   height  rnum abs_dist_from_avg
  <fct>  <dbl> <int>             <dbl>
    1 Male   67        7           0      
  2 Male   67       14           0      
  3 Male   67       15           0      
  4 Male   66.93    10           0.07087
  5 Male   66.92     4           0.08000
  6 Male   66.14    11           0.8583 
  7 Male   66        1           1      
  8 Male   68        2           1      
  9 Male   68        9           1      
  10 Male   65       13           2      
  11 Male   69       16           2      
  12 Male   70        5           3      
  13 Male   70        8           3      
  14 Male   71        6           4      
  15 Male   71       12           4      
  16 Male   62.99    18           4.008  
  17 Male   61        3           6      
  18 Male   73       17           6      
    male_df4 <- head(male_df3,1)  # take the top row
    male_df4
  # A tibble: 1 x 4
  sex   height  rnum abs_dist_from_avg
  <fct>  <dbl> <int>             <dbl>
    1 Male      67     7                 0
   avg_male_position <-  pull(male_df4,rnum)  # take the single indice/row number from it (rnum)
  avg_male_position
  [1] 7
  
    # we can go back to male_df2 and add our final metrics to that
     final_male_df <- male_df2 %>% mutate(height_ge_median = height >= avg_male,
                                                      row_ge_avg_pos = rnum >= avg_male_position)
   final_male_df
  # A tibble: 18 x 6
  sex   height  rnum abs_dist_from_avg height_ge_median row_ge_avg_pos
  <fct>  <dbl> <int>             <dbl> <lgl>            <lgl>         
  1 Male   66        1           1       FALSE            FALSE         
  2 Male   68        2           1       TRUE             FALSE         
  3 Male   61        3           6       FALSE            FALSE         
  4 Male   66.92     4           0.08000 FALSE            FALSE         
  5 Male   70        5           3       TRUE             FALSE         
  6 Male   71        6           4       TRUE             FALSE         
  7 Male   67        7           0       TRUE             TRUE          
  8 Male   70        8           3       TRUE             TRUE          
  9 Male   68        9           1       TRUE             TRUE          
  10 Male   66.93    10           0.07087 FALSE            TRUE          
  11 Male   66.14    11           0.8583  FALSE            TRUE          
  12 Male   71       12           4       TRUE             TRUE          
  13 Male   65       13           2       FALSE            TRUE          
  14 Male   67       14           0       TRUE             TRUE          
  15 Male   67       15           0       TRUE             TRUE          
  16 Male   69       16           2       TRUE             TRUE          
  17 Male   73       17           6       TRUE             TRUE          
  18 Male   62.99    18           4.008   FALSE            TRUE

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.