How to generate new rows by performing arithmetic on multiple other rows

I have this tibble derived from {tidycensus}'s American Community Survey data.

tibble::tribble(
                ~tenure_type,                                    ~race_eth_type,              ~count,                   ~moe,
            "Owner occupied",                                             "ALL",                  76444810,                  367132,
           "Renter occupied",                                             "ALL",                  43285318,                  139467,
            "Owner occupied",     "BLACK OR AFRICAN AMERICAN ALONE HOUSEHOLDER",                   6131359,                   45617,
           "Renter occupied",     "BLACK OR AFRICAN AMERICAN ALONE HOUSEHOLDER",                   8552346,                   29687,
            "Owner occupied", "WHITE ALONE, NOT HISPANIC OR LATINO HOUSEHOLDER",                  58423846,                  226380,
           "Renter occupied", "WHITE ALONE, NOT HISPANIC OR LATINO HOUSEHOLDER",                  22993331,                   82321
           )

I want to group by tenure_type and generate rows for a new race_eth_type called "OTHER". For each tenure type, count will equal the count for "ALL" minus the counts for "BLACK..." and "WHITE..." (76444810 - 6131359 - 58423846 = 11889605, etc). moe would be generated using tidycensus::moe_sum.

Maybe my brain is just overly fatigued, but I can't figure out how to do this. :frowning:

suppressPackageStartupMessages({
  library(dplyr)
})
DF <- tibble::tribble(
  ~tenure_type, ~race_eth_type, ~count, ~moe,
  "Owner occupied", "ALL", 76444810, 367132,
  "Renter occupied", "ALL", 43285318, 139467,
  "Owner occupied", "BLACK OR AFRICAN AMERICAN ALONE HOUSEHOLDER", 6131359, 45617,
  "Renter occupied", "BLACK OR AFRICAN AMERICAN ALONE HOUSEHOLDER", 8552346, 29687,
  "Owner occupied", "WHITE ALONE, NOT HISPANIC OR LATINO HOUSEHOLDER", 58423846, 226380,
  "Renter occupied", "WHITE ALONE, NOT HISPANIC OR LATINO HOUSEHOLDER", 22993331, 82321
)

ALL <- DF[1:2,1:3]
BW  <- DF[3:6,1:3]

BW_OO <- sum(DF[c(3,5),1:3]$count)
BW_RT <- sum(DF[c(4,6),1:3]$count)
ALL_OO <- sum(DF[1,1:3]$count)
ALL_RT <- sum(DF[2,1:3]$count)
OTH_OO <- data.frame(tenure_type = "Renter occupied",race_eth_type = "OTHER", count = ALL_OO - BW_OO, moe = NA)
OTH_RT <- data.frame(tenure_type = "Owner occupied",race_eth_type = "OTHER", count = ALL_RT - BW_RT, moe = NA)

bind_rows(DF,OTH_OO,OTH_RT)
#> # A tibble: 8 x 4
#>   tenure_type     race_eth_type                                     count    moe
#>   <chr>           <chr>                                             <dbl>  <dbl>
#> 1 Owner occupied  ALL                                              7.64e7 367132
#> 2 Renter occupied ALL                                              4.33e7 139467
#> 3 Owner occupied  BLACK OR AFRICAN AMERICAN ALONE HOUSEHOLDER      6.13e6  45617
#> 4 Renter occupied BLACK OR AFRICAN AMERICAN ALONE HOUSEHOLDER      8.55e6  29687
#> 5 Owner occupied  WHITE ALONE, NOT HISPANIC OR LATINO HOUSEHOLDER  5.84e7 226380
#> 6 Renter occupied WHITE ALONE, NOT HISPANIC OR LATINO HOUSEHOLDER  2.30e7  82321
#> 7 Renter occupied OTHER                                            1.19e7     NA
#> 8 Owner occupied  OTHER                                            1.17e7     NA

Created on 2021-01-02 by the reprex package (v0.3.0.9001)

1 Like

This here should obtain your desired result:

library(dplyr)
library(stringr)

dat <- tibble::tribble(
  ~tenure_type,                                    ~race_eth_type,              ~count,                   ~moe,
  "Owner occupied",                                             "ALL",                  76444810,                  367132,
  "Renter occupied",                                             "ALL",                  43285318,                  139467,
  "Owner occupied",     "BLACK OR AFRICAN AMERICAN ALONE HOUSEHOLDER",                   6131359,                   45617,
  "Renter occupied",     "BLACK OR AFRICAN AMERICAN ALONE HOUSEHOLDER",                   8552346,                   29687,
  "Owner occupied", "WHITE ALONE, NOT HISPANIC OR LATINO HOUSEHOLDER",                  58423846,                  226380,
  "Renter occupied", "WHITE ALONE, NOT HISPANIC OR LATINO HOUSEHOLDER",                  22993331,                   82321
)

dat %>%
  mutate(race = case_when(
    race_eth_type == "ALL" ~ "A",
    str_detect(race_eth_type, "BLACK") ~ "B",
    str_detect(race_eth_type, "WHITE") ~ "W"
  )) %>%
  group_by(tenure_type) %>%
  summarize(new_count = count[race == "A"] - count[race == "B"] - count[race == "W"])

`summarise()` ungrouping output (override with `.groups` argument)
# A tibble: 2 x 2
  tenure_type     new_count
  <chr>               <dbl>
1 Owner occupied   11889605
2 Renter occupied  11739641
1 Like

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.