How to generate a table of counts for multiple dependant columns grouped by two variables

Hello!

I am trying to generate a table using the data table (can be generated using the code below). I need to be able to group the data to show me the number of people in each state for each raceethk who have taken each test (for example: in alabama, how many people of each raceethk category have taken test P_u12CRV, have not taken test P_u12CRV, and NA. How many people of each raceethk category have taken test P_UTDHEP, have not taken test P_UTDHEP, and NA, and so on for each test all in one table). I know that tidyverse can be used through the groupby function to do generate a different table for each test column individually, but I am not sure how to generate one large table for all of the tests. I appreciate any and all help!

Here is the code to use to generate the data: structure(list(SEQNUMC = structure(c("152651", "12631", "261061", "102261", "269631", "247001", "262721", "217211", "191371", "170311", "152311", "110191", "162801", "54921", "104221", "48621"), label = "UNIQUE CHILD IDENTIFIER", class = c("labelled", "character")), PDAT = structure(c(1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L), .Label = c("CHILD HAS ADEQUATE PROVIDER DATA OR ZERO VACCINATIONS", "CHILD DOES NOT HAVE ADEQUATE PROVIDER DATA"), class = "factor"), RACEETHK = structure(c(1L, 2L, 1L, 4L, 1L, 1L, 2L, 2L, 2L, 2L, 4L, 4L, 1L, 2L, 1L, 2L), .Label = c("HISPANIC", "NON-HISPANIC WHITE ONLY", "NON-HISPANIC BLACK ONLY", "NON-HISPANIC OTHER + MULTIPLE RACE"), class = "factor"), STATE = structure(c(40L, 18L, 47L, 49L, 4L, 36L, 18L, 16L, 45L, 18L, 14L, 16L, 4L, 42L, 7L, 1L), .Label = c("ALABAMA", "DELAWARE", "DISTRICT OF COLUMBIA", "FLORIDA", "GEORGIA", "HAWAII", "IDAHO", "ILLINOIS", "INDIANA", "IOWA", "ALASKA", "KANSAS", "KENTUCKY", "LOUISIANA", "MAINE", "MARYLAND", "MASSACHUSETTS", "MICHIGAN", "MINNESOTA", "MISSISSIPPI", "MISSOURI", "MONTANA", "NEBRASKA", "NEVADA", "NEW HAMPSHIRE", "NEW JERSEY", "NEW MEXICO", "NEW YORK", "NORTH CAROLINA", "NORTH DAKOTA", "OHIO", "ARIZONA", "OKLAHOMA", "OREGON", "PENNSYLVANIA", "RHODE ISLAND", "SOUTH CAROLINA", "SOUTH DAKOTA", "TENNESSEE", "TEXAS", "UTAH", "ARKANSAS", "VERMONT", "VIRGINIA", "WASHINGTON", "WEST VIRGINIA", "WISCONSIN", "WYOMING", "CALIFORNIA", "GUAM", "PUERTO RICO", "U.S. VIRGIN ISLANDS", "COLORADO", "CONNECTICUT"), class = "factor"), P_U12VRC = structure(c(2L, NA, 1L, NA, NA, NA, NA, NA, NA, NA, NA, NA, 2L, NA, 2L, NA), .Label = c("NOT UTD", "UTD"), class = "factor"), P_UTDHEP = structure(c(2L, NA, 1L, NA, NA, NA, NA, NA, NA, NA, NA, NA, 2L, NA, 2L, NA), .Label = c("NOT UTD", "UTD"), class = "factor"), P_UTDHEPA2 = structure(c(2L, NA, 1L, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1L, NA, 2L, NA), .Label = c("NOT UTD", "UTD"), class = "factor")), row.names = c(NA, 16L), class = "data.frame")

suppressPackageStartupMessages({
  library(dplyr)
})
dta <- structure(list(SEQNUMC = structure(c("152651", "12631", "261061", "102261", "269631", "247001", "262721", "217211", "191371", "170311", "152311", "110191", "162801", "54921", "104221", "48621"), label = "UNIQUE CHILD IDENTIFIER", class = c("labelled", "character")), PDAT = structure(c(1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 2L), .Label = c("CHILD HAS ADEQUATE PROVIDER DATA OR ZERO VACCINATIONS", "CHILD DOES NOT HAVE ADEQUATE PROVIDER DATA"), class = "factor"), RACEETHK = structure(c(1L, 2L, 1L, 4L, 1L, 1L, 2L, 2L, 2L, 2L, 4L, 4L, 1L, 2L, 1L, 2L), .Label = c("HISPANIC", "NON-HISPANIC WHITE ONLY", "NON-HISPANIC BLACK ONLY", "NON-HISPANIC OTHER + MULTIPLE RACE"), class = "factor"), STATE = structure(c(40L, 18L, 47L, 49L, 4L, 36L, 18L, 16L, 45L, 18L, 14L, 16L, 4L, 42L, 7L, 1L), .Label = c("ALABAMA", "DELAWARE", "DISTRICT OF COLUMBIA", "FLORIDA", "GEORGIA", "HAWAII", "IDAHO", "ILLINOIS", "INDIANA", "IOWA", "ALASKA", "KANSAS", "KENTUCKY", "LOUISIANA", "MAINE", "MARYLAND", "MASSACHUSETTS", "MICHIGAN", "MINNESOTA", "MISSISSIPPI", "MISSOURI", "MONTANA", "NEBRASKA", "NEVADA", "NEW HAMPSHIRE", "NEW JERSEY", "NEW MEXICO", "NEW YORK", "NORTH CAROLINA", "NORTH DAKOTA", "OHIO", "ARIZONA", "OKLAHOMA", "OREGON", "PENNSYLVANIA", "RHODE ISLAND", "SOUTH CAROLINA", "SOUTH DAKOTA", "TENNESSEE", "TEXAS", "UTAH", "ARKANSAS", "VERMONT", "VIRGINIA", "WASHINGTON", "WEST VIRGINIA", "WISCONSIN", "WYOMING", "CALIFORNIA", "GUAM", "PUERTO RICO", "U.S. VIRGIN ISLANDS", "COLORADO", "CONNECTICUT"), class = "factor"), P_U12VRC = structure(c(2L, NA, 1L, NA, NA, NA, NA, NA, NA, NA, NA, NA, 2L, NA, 2L, NA), .Label = c("NOT UTD", "UTD"), class = "factor"), P_UTDHEP = structure(c(2L, NA, 1L, NA, NA, NA, NA, NA, NA, NA, NA, NA, 2L, NA, 2L, NA), .Label = c("NOT UTD", "UTD"), class = "factor"), P_UTDHEPA2 = structure(c(2L, NA, 1L, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1L, NA, 2L, NA), .Label = c("NOT UTD", "UTD"), class = "factor")), row.names = c(NA, 16L), class = "data.frame")

dta
#>    SEQNUMC                                                  PDAT
#> 1   152651 CHILD HAS ADEQUATE PROVIDER DATA OR ZERO VACCINATIONS
#> 2    12631            CHILD DOES NOT HAVE ADEQUATE PROVIDER DATA
#> 3   261061 CHILD HAS ADEQUATE PROVIDER DATA OR ZERO VACCINATIONS
#> 4   102261            CHILD DOES NOT HAVE ADEQUATE PROVIDER DATA
#> 5   269631            CHILD DOES NOT HAVE ADEQUATE PROVIDER DATA
#> 6   247001            CHILD DOES NOT HAVE ADEQUATE PROVIDER DATA
#> 7   262721            CHILD DOES NOT HAVE ADEQUATE PROVIDER DATA
#> 8   217211            CHILD DOES NOT HAVE ADEQUATE PROVIDER DATA
#> 9   191371            CHILD DOES NOT HAVE ADEQUATE PROVIDER DATA
#> 10  170311            CHILD DOES NOT HAVE ADEQUATE PROVIDER DATA
#> 11  152311            CHILD DOES NOT HAVE ADEQUATE PROVIDER DATA
#> 12  110191            CHILD DOES NOT HAVE ADEQUATE PROVIDER DATA
#> 13  162801 CHILD HAS ADEQUATE PROVIDER DATA OR ZERO VACCINATIONS
#> 14   54921            CHILD DOES NOT HAVE ADEQUATE PROVIDER DATA
#> 15  104221 CHILD HAS ADEQUATE PROVIDER DATA OR ZERO VACCINATIONS
#> 16   48621            CHILD DOES NOT HAVE ADEQUATE PROVIDER DATA
#>                              RACEETHK        STATE P_U12VRC P_UTDHEP P_UTDHEPA2
#> 1                            HISPANIC        TEXAS      UTD      UTD        UTD
#> 2             NON-HISPANIC WHITE ONLY     MICHIGAN     <NA>     <NA>       <NA>
#> 3                            HISPANIC    WISCONSIN  NOT UTD  NOT UTD    NOT UTD
#> 4  NON-HISPANIC OTHER + MULTIPLE RACE   CALIFORNIA     <NA>     <NA>       <NA>
#> 5                            HISPANIC      FLORIDA     <NA>     <NA>       <NA>
#> 6                            HISPANIC RHODE ISLAND     <NA>     <NA>       <NA>
#> 7             NON-HISPANIC WHITE ONLY     MICHIGAN     <NA>     <NA>       <NA>
#> 8             NON-HISPANIC WHITE ONLY     MARYLAND     <NA>     <NA>       <NA>
#> 9             NON-HISPANIC WHITE ONLY   WASHINGTON     <NA>     <NA>       <NA>
#> 10            NON-HISPANIC WHITE ONLY     MICHIGAN     <NA>     <NA>       <NA>
#> 11 NON-HISPANIC OTHER + MULTIPLE RACE    LOUISIANA     <NA>     <NA>       <NA>
#> 12 NON-HISPANIC OTHER + MULTIPLE RACE     MARYLAND     <NA>     <NA>       <NA>
#> 13                           HISPANIC      FLORIDA      UTD      UTD    NOT UTD
#> 14            NON-HISPANIC WHITE ONLY     ARKANSAS     <NA>     <NA>       <NA>
#> 15                           HISPANIC        IDAHO      UTD      UTD        UTD
#> 16            NON-HISPANIC WHITE ONLY      ALABAMA     <NA>     <NA>       <NA>

dta %>% group_by(STATE,RACEETHK) %>% count()
#> # A tibble: 13 x 3
#> # Groups:   STATE, RACEETHK [13]
#>    STATE        RACEETHK                               n
#>    <fct>        <fct>                              <int>
#>  1 ALABAMA      NON-HISPANIC WHITE ONLY                1
#>  2 FLORIDA      HISPANIC                               2
#>  3 IDAHO        HISPANIC                               1
#>  4 LOUISIANA    NON-HISPANIC OTHER + MULTIPLE RACE     1
#>  5 MARYLAND     NON-HISPANIC WHITE ONLY                1
#>  6 MARYLAND     NON-HISPANIC OTHER + MULTIPLE RACE     1
#>  7 MICHIGAN     NON-HISPANIC WHITE ONLY                3
#>  8 RHODE ISLAND HISPANIC                               1
#>  9 TEXAS        HISPANIC                               1
#> 10 ARKANSAS     NON-HISPANIC WHITE ONLY                1
#> 11 WASHINGTON   NON-HISPANIC WHITE ONLY                1
#> 12 WISCONSIN    HISPANIC                               1
#> 13 CALIFORNIA   NON-HISPANIC OTHER + MULTIPLE RACE     1

# won't work with factors
# dta %>% group_by(STATE,RACEETHK) %>% summarise(P_U12VRC = sum(P_U12VRC))

# defactor

dta %>% mutate(P_U12VRC = as.numeric(P_U12VRC)) -> dta2

dta2 %>% group_by(STATE,RACEETHK) %>% summarise(P_U12VRC = sum(P_U12VRC))
#> `summarise()` has grouped output by 'STATE'. You can override using the `.groups` argument.
#> # A tibble: 13 x 3
#> # Groups:   STATE [12]
#>    STATE        RACEETHK                           P_U12VRC
#>    <fct>        <fct>                                 <dbl>
#>  1 ALABAMA      NON-HISPANIC WHITE ONLY                  NA
#>  2 FLORIDA      HISPANIC                                 NA
#>  3 IDAHO        HISPANIC                                  2
#>  4 LOUISIANA    NON-HISPANIC OTHER + MULTIPLE RACE       NA
#>  5 MARYLAND     NON-HISPANIC WHITE ONLY                  NA
#>  6 MARYLAND     NON-HISPANIC OTHER + MULTIPLE RACE       NA
#>  7 MICHIGAN     NON-HISPANIC WHITE ONLY                  NA
#>  8 RHODE ISLAND HISPANIC                                 NA
#>  9 TEXAS        HISPANIC                                  2
#> 10 ARKANSAS     NON-HISPANIC WHITE ONLY                  NA
#> 11 WASHINGTON   NON-HISPANIC WHITE ONLY                  NA
#> 12 WISCONSIN    HISPANIC                                  1
#> 13 CALIFORNIA   NON-HISPANIC OTHER + MULTIPLE RACE       NA

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.