Use dplyr to do grouped t-tests and get number of observations simultanously

tbradley · February 11, 2019, 5:29pm

You can use list-columns via group_by + nest to do it like this:

library('tidyverse')
library('broom')
set.seed(354654)
d = tibble(value = rnorm(100),
           category = sample(1:5, replace = TRUE, 100),
           group = sample(c('A', 'B'), replace = TRUE, 100)) %>% 
  arrange(category)

d %>% 
  group_by(category, group) %>% 
  nest() %>% 
  spread(key = group, value = data) %>% 
  mutate(
    t_test = map2(A, B, ~{t.test(.x$value, .y$value) %>% tidy()}),
    A = map(A, nrow),
    B = map(B, nrow)
  ) %>% 
  unnest()
#> # A tibble: 5 x 13
#>   category     A     B estimate estimate1 estimate2 statistic p.value
#>      <int> <int> <int>    <dbl>     <dbl>     <dbl>     <dbl>   <dbl>
#> 1        1     9    13    0.296    0.290   -0.00634     0.889   0.385
#> 2        2     5     7   -0.698   -0.668    0.0299     -1.18    0.298
#> 3        3    14    10    0.359    0.388    0.0292      0.801   0.435
#> 4        4     8    13    0.387    0.0910  -0.296       0.791   0.442
#> 5        5     7    14    0.271    0.232   -0.0388      0.713   0.485
#> # ... with 5 more variables: parameter <dbl>, conf.low <dbl>,
#> #   conf.high <dbl>, method <chr>, alternative <chr>

Created on 2019-02-11 by the reprex package (v0.2.0).