Describing survey results with unequal number of observations

jkdby · April 13, 2022, 8:05pm

Hello,

I am trying to generate very simple general descriptive statistics on how children rated the taste of a drink. Responses on the survey question range from 0 to 10 but it is getting complicated because each child answered a different number of surveys across the course of a week long period.

For each participant, I would like to get the percentage of times they used response (e.g. 1: 10%, 2: 0%, 3:20%, etc).

I was thinking along these lines but something that actually works!

  group_by(subject_id) %>% 
  summarize(endorse_10 = count(SUGARY_DRINK_TASTE == 10)/length())

Can anyone advise?

For a sample of my dataset please see below:

taste <- structure(list(subject_id = c(28053, 28053, 28053, 28053, 28053, 
28054, 28054, 28054, 28056, 28056, 28056, 28056, 28056, 28056, 
28056, 28056, 28056, 28057, 28057, 28057, 28057, 28057, 28057, 
28057, 28057, 28057, 28057, 28057, 28057, 28057, 28057, 28057, 
28057, 28057, 28057, 28058, 28058, 28058, 28058, 28058, 28058, 
28058, 28058, 28058, 28058, 28058, 28058, 28058, 28058, 28058
), SUGARY_DRINK_TASTE = c(10, 0, 10, 10, 10, 9, 10, 1, 9, 10, 
10, 6, 10, 8, 10, 10, 4, 8, 7, 8, 7, 8, 9, 9, 9, 9, 7, 10, 10, 
0, 7, 8, 7, 10, 8, 5, 10, 8, 8, 10, 10, 10, 10, 6, 7, 10, 10, 
6, 10, 10)), class = c("grouped_df", "tbl_df", "tbl", "data.frame"
), row.names = c(NA, -50L), groups = structure(list(subject_id = c(28053, 
28054, 28056, 28057, 28058), .rows = structure(list(1:5, 6:8, 
    9:17, 18:35, 36:50), ptype = integer(0), class = c("vctrs_list_of", 
"vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -5L), .drop = TRUE))

FJCC · April 13, 2022, 9:09pm

Is this what you are trying to get?

taste <- structure(list(subject_id = c(28053, 28053, 28053, 28053, 28053, 
                                       28054, 28054, 28054, 28056, 28056, 28056, 28056, 28056, 28056, 
                                       28056, 28056, 28056, 28057, 28057, 28057, 28057, 28057, 28057, 
                                       28057, 28057, 28057, 28057, 28057, 28057, 28057, 28057, 28057, 
                                       28057, 28057, 28057, 28058, 28058, 28058, 28058, 28058, 28058, 
                                       28058, 28058, 28058, 28058, 28058, 28058, 28058, 28058, 28058), 
                        SUGARY_DRINK_TASTE = c(10, 0, 10, 10, 10, 9, 10, 1, 9, 10, 
                          10, 6, 10, 8, 10, 10, 4, 8, 7, 8, 7, 8, 9, 9, 9, 9, 7, 10, 10, 
                          0, 7, 8, 7, 10, 8, 5, 10, 8, 8, 10, 10, 10, 10, 6, 7, 10, 10, 
                          6, 10, 10)), class = c("grouped_df", "tbl_df", "tbl", "data.frame"
                          ), row.names = c(NA, -50L), 
groups = structure(list(subject_id = c(28053, 28054, 28056, 28057, 28058), 
                        .rows = structure(list(1:5, 6:8, 9:17, 18:35, 36:50), ptype = integer(0), 
                                          class = c("vctrs_list_of", "vctrs_vctr", "list"))), 
                   class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, -5L), .drop = TRUE))
library(dplyr)

COUNTS <- taste %>% group_by(subject_id, SUGARY_DRINK_TASTE) %>% count()
Frac <- COUNTS %>% group_by(subject_id) %>% mutate(Frac = n/sum(n))
Frac
#> # A tibble: 20 × 4
#> # Groups:   subject_id [5]
#>    subject_id SUGARY_DRINK_TASTE     n   Frac
#>         <dbl>              <dbl> <int>  <dbl>
#>  1      28053                  0     1 0.2   
#>  2      28053                 10     4 0.8   
#>  3      28054                  1     1 0.333 
#>  4      28054                  9     1 0.333 
#>  5      28054                 10     1 0.333 
#>  6      28056                  4     1 0.111 
#>  7      28056                  6     1 0.111 
#>  8      28056                  8     1 0.111 
#>  9      28056                  9     1 0.111 
#> 10      28056                 10     5 0.556 
#> 11      28057                  0     1 0.0556
#> 12      28057                  7     5 0.278 
#> 13      28057                  8     5 0.278 
#> 14      28057                  9     4 0.222 
#> 15      28057                 10     3 0.167 
#> 16      28058                  5     1 0.0667
#> 17      28058                  6     2 0.133 
#> 18      28058                  7     1 0.0667
#> 19      28058                  8     2 0.133 
#> 20      28058                 10     9 0.6

^{Created on 2022-04-13 by the reprex package (v0.2.1)}

jkdby · April 14, 2022, 4:03pm

This is exactly what I had in mind, thank you very much!

Do you know what the best way for me to add a "0" for the proportions in between 0 and 10 even if the participant never used that response.

So in the example above, for participant 28053, ideally I would like n= 0, Frac = 0 .2; n =1, Frac = 0; n = 2, Frac = 0... n= 10, Frac = 0.8

FJCC · April 14, 2022, 5:51pm

The compete() function from tidyr can do that.

taste <- structure(list(subject_id = c(28053, 28053, 28053, 28053, 28053, 
                                       28054, 28054, 28054, 28056, 28056, 28056, 28056, 28056, 28056, 
                                       28056, 28056, 28056, 28057, 28057, 28057, 28057, 28057, 28057, 
                                       28057, 28057, 28057, 28057, 28057, 28057, 28057, 28057, 28057, 
                                       28057, 28057, 28057, 28058, 28058, 28058, 28058, 28058, 28058, 
                                       28058, 28058, 28058, 28058, 28058, 28058, 28058, 28058, 28058), 
                        SUGARY_DRINK_TASTE = c(10, 0, 10, 10, 10, 9, 10, 1, 9, 10, 
                          10, 6, 10, 8, 10, 10, 4, 8, 7, 8, 7, 8, 9, 9, 9, 9, 7, 10, 10, 
                          0, 7, 8, 7, 10, 8, 5, 10, 8, 8, 10, 10, 10, 10, 6, 7, 10, 10, 
                          6, 10, 10)), class = c("grouped_df", "tbl_df", "tbl", "data.frame"
                          ), row.names = c(NA, -50L), 
groups = structure(list(subject_id = c(28053, 28054, 28056, 28057, 28058), 
                        .rows = structure(list(1:5, 6:8, 9:17, 18:35, 36:50), ptype = integer(0), 
                                          class = c("vctrs_list_of", "vctrs_vctr", "list"))), 
                   class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, -5L), .drop = TRUE))

library(dplyr)
library(tidyr)
taste <- taste %>% mutate(SUGARY_DRINK_TASTE = factor(SUGARY_DRINK_TASTE, levels = 0:10))
COUNTS <- taste %>% group_by(subject_id, SUGARY_DRINK_TASTE) %>% count()
COUNTS <- COUNTS %>% ungroup() %>% complete(subject_id, SUGARY_DRINK_TASTE, fill = list(n =0))
Frac <- COUNTS %>% group_by(subject_id) %>% mutate(Frac = n/sum(n))
Frac
Frac
# A tibble: 55 × 4
# Groups:   subject_id [5]
   subject_id SUGARY_DRINK_TASTE     n  Frac
        <dbl> <fct>              <int> <dbl>
 1      28053 0                      1   0.2
 2      28053 1                      0   0  
 3      28053 2                      0   0  
 4      28053 3                      0   0  
 5      28053 4                      0   0  
 6      28053 5                      0   0  
 7      28053 6                      0   0  
 8      28053 7                      0   0  
 9      28053 8                      0   0  
10      28053 9                      0   0

system · May 5, 2022, 5:51pm

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.