Calculate expected frequencies for chi squared test of independence

Does anyone know a tidyverse way to calculate expected frequencies for chi squared test of independence on the following data?

library(tidyverse)

sex  <- c("F", "F", "F", "M", "M", "M")
voting <- c("D", "I", "R", "D", "I", "R")
n <- c(200, 150, 50, 250, 300, 50)

data <- tibble(sex, voting, n) %>% 
  pivot_wider(names_from = voting, values_from = n) %>% 
  janitor::adorn_totals(c("row", "col"))  

I'm aware you can calculate the test statistic and p-value using janitor::chisq.test, but I wanted to see if I could calculate it the long way

May be we can work this way

library(tidyverse)
library(janitor)
#> 
#> Attaching package: 'janitor'
#> The following objects are masked from 'package:stats':
#> 
#>     chisq.test, fisher.test
sex  <- c("F", "F", "F", "M", "M", "M")
voting <- c("D", "I", "R", "D", "I", "R")
n <- c(200, 150, 50, 250, 300, 50)

data <- tibble(sex, voting, n) %>% 
  pivot_wider(names_from = voting, values_from = n) %>% 
  janitor::adorn_totals(c("col")) %>% pivot_longer(cols = c(D, I, R)) %>% group_by(name) %>% mutate(rowsum = sum(value)) %>% mutate(expectedfrequency = Total * rowsum/ sum(value))
data
#> # A tibble: 6 x 6
#> # Groups:   name [3]
#>   sex   Total name  value rowsum expectedfrequency
#>   <chr> <dbl> <chr> <dbl>  <dbl>             <dbl>
#> 1 F       400 D       200    450               400
#> 2 F       400 I       150    450               400
#> 3 F       400 R        50    100               400
#> 4 M       600 D       250    450               600
#> 5 M       600 I       300    450               600
#> 6 M       600 R        50    100               600

Thanks @gtmbini

Add an ungroup at the end and that is the correct answer.

cat1  <- c("M", "M", "M", "F", "F", "F")
cat2 <- c("D", "I", "R", "D", "I", "R")
n <- c(200, 150, 50, 250, 300, 50)

tibble(cat1, cat2, n) %>% 
  pivot_wider(names_from = cat2, values_from = n) %>% 
  rowwise() %>% 
  mutate(rowsum = sum(D, I, R)) %>% 
  pivot_longer(cols = D:R, names_to = "cat2", values_to = "n")  %>% 
  group_by(cat2) %>% 
  mutate(colsum = sum(n)) %>% 
  ungroup() %>% 
  select(cat1, cat2, n, rowsum, colsum) %>% 
  mutate(expected = rowsum * colsum / sum(n))

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.