Average and count of values for subgroups

Hi R masters,
I have written following code to calculate average scores for each question and for each gender (and total):

response.data <- data.frame(
  A_Top2 = c(NA, 0, 100, 0, 0, 0, 0, 0, 0, 0, 100, 100, 0, NA),
  B_Top2 = c(0, 0, 100, 100, 0, 0, 0, 0, 100, 0, 0, 0, 100, 100),
  C_Yes = c(NA, 0, 0, 100, 100, 0, 0, NA, 0, 0, 0, 0, 100,  100),
  D_Yes = c(0, 0, 0, 100, 0, 100, 0, 0, 100, 0,  0, 0, 100, NA),
  URN = as.factor(c("aaa", "bbb", "ccc", "ddd", "eee", "fff", "ggg",
                    "hhh", "iii", "jjj", "kkk", "lll", "mmm", "nnn")),
  Gender = as.factor(c("Male", "Male", "Male", "Male", "Male", "Male",
                       "Female", "Female", "Female", "Female", "Female",
                       "Female", "Male", "Male"))
)
response.data


response.data %>%
  bind_rows(mutate(.data = .,
                   Gender = "Total")) %>%
  group_by(Gender) %>%
  summarise_at(.vars = vars(ends_with(match = "Top2"), ends_with(match = "Yes")),
               .funs = list(Aver = mean,
                            Count = ~n())) %>%
  print(width = Inf)

Unfortunately, results are incorrect. Some Averages are blank. Also, count should take into account valid responses for each question (Only Question B_Top2 is answered by all 14 respondents).

Do you know what I am doing wrong?

you just need to tweak your function list

.funs = list(Aver = ~mean(.,na.rm=TRUE),
             Count = ~sum(!is.na(.)))

Thank you very much. It is working on Character variables (like Gender). It doesn't on numeric ones. I've added Year

response.data <- data.frame(
  A_Top2 = c(NA, 0, 100, 0, 0, 0, 0, 0, 0, 0, 100, 100, 0, NA),
  B_Top2 = c(0, 0, 100, 100, 0, 0, 0, 0, 100, 0, 0, 0, 100, 100),
  C_Yes = c(NA, 0, 0, 100, 100, 0, 0, NA, 0, 0, 0, 0, 100,  100),
  D_Yes = c(0, 0, 0, 100, 0, 100, 0, 0, 100, 0,  0, 0, 100, NA),
  Year = c(2020, 2020, 2019, 2018, 2020, 2019,2020, 2020, 2019, 2018, 2020, 2019,NA, 2021),
  URN = as.factor(c("aaa", "bbb", "ccc", "ddd", "eee", "fff", "ggg",
                    "hhh", "iii", "jjj", "kkk", "lll", "mmm", "nnn")),
  Gender = as.factor(c("Male", "Male", "Male", "Male", "Male", "Male",
                       "Female", "Female", "Female", "Female", "Female",
                       "Female", "Male", "Male"))
)
result <- response.data %>%
  bind_rows(mutate(.data = .,
                   Year = "Total")) %>%
  group_by(Year) %>%
  summarise_at(.vars = vars(ends_with(match = "Top2"), ends_with(match = "Yes")),.funs = list(Aver = ~mean(.,na.rm=TRUE),
                                                                                              Count = ~sum(!is.na(.))))

result

but the function is not working.

I have this error

                        Error: Can't combine `..1$Year` <double> and `..2$Year` <character>.

Can you help please?

In response.data, Year is numeric and you're trying to add on rows where Year is character and you can't have a column with multiple types. One solution is to change Year to character before adding on the "Total" row like shown below

library(tidyverse)

response.data <- data.frame(
   A_Top2 = c(NA, 0, 100, 0, 0, 0, 0, 0, 0, 0, 100, 100, 0, NA),
   B_Top2 = c(0, 0, 100, 100, 0, 0, 0, 0, 100, 0, 0, 0, 100, 100),
   C_Yes = c(NA, 0, 0, 100, 100, 0, 0, NA, 0, 0, 0, 0, 100,  100),
   D_Yes = c(0, 0, 0, 100, 0, 100, 0, 0, 100, 0,  0, 0, 100, NA),
   Year = c(2020, 2020, 2019, 2018, 2020, 2019,2020, 2020, 2019, 2018, 2020, 2019,NA, 2021),
   URN = as.factor(c("aaa", "bbb", "ccc", "ddd", "eee", "fff", "ggg",
                     "hhh", "iii", "jjj", "kkk", "lll", "mmm", "nnn")),
   Gender = as.factor(c("Male", "Male", "Male", "Male", "Male", "Male",
                        "Female", "Female", "Female", "Female", "Female",
                        "Female", "Male", "Male"))
)


result2 <- response.data %>%
   mutate(Year=as.character(Year)) %>%
   bind_rows(mutate(.data = .,
                    Year = "Total")) %>%
   group_by(Year) %>%
   summarise_at(.vars = vars(ends_with(match = "Top2"), ends_with(match = "Yes")),.funs = list(Aver = ~mean(.,na.rm=TRUE),
                                                                                               Count = ~sum(!is.na(.))))

result2
#> # A tibble: 6 x 9
#>   Year  A_Top2_Aver B_Top2_Aver C_Yes_Aver D_Yes_Aver A_Top2_Count B_Top2_Count
#> * <chr>       <dbl>       <dbl>      <dbl>      <dbl>        <int>        <int>
#> 1 2018            0        50         50         50              2            2
#> 2 2019           50        50          0         50              4            4
#> 3 2020           20         0         25          0              5            6
#> 4 2021          NaN       100        100        NaN              0            1
#> 5 Total          25        35.7       33.3       30.8           12           14
#> 6 <NA>            0       100        100        100              1            1
#> # ... with 2 more variables: C_Yes_Count <int>, D_Yes_Count <int>

Created on 2021-03-12 by the reprex package (v1.0.0)

1 Like

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.