Thanks for asking for a clarification.
I have data (Numerator and Denominator) by year and by various grouping variables.
First I need to produce three-year running totals for the numerator and for the denominator based on various groupings. Once I have the three year totals. I need to join the two tables. Then I perform various computes with basic_compute. The function basic_compute has two parameters – numerator, denominator.
The issue is multiple groupings are possible. I want one function to handle all possible groupings and the possibility that the numerator and denominator data are either in the same data frame or in different data frames..
After looking at Chapter 17, of Advanced R, I came up with the following code with both the numerator data and the denominator in the same data frame (but that is definitely not always the case).
[17 Big picture | Advanced R](https://Advanced R)
The following code is a partial solution, but is not general enough.
library(purrr)
library(dplyr)
library(tidyr)
library(slider) # for grouping consecutive years
# The sample data
set.seed(2021)
EVENT_YEAR = 2010:2015
RE = c('white', 'black', 'Asian')
City = c('Oakland', 'San Francisco', 'San Jose')
Note = 1:3
demoDF_N = expand.grid(EVENT_YEAR = EVENT_YEAR, RE = RE, City = City, Note = Note)
demoDF_N$Numerator = sample(3:10, 162, replace = TRUE)
demoDF_N$EVENT_YEAR = as.factor(demoDF_N$EVENT_YEAR)
demoDF_N$RE = as.factor(demoDF_N$RE)
demoDF_N$City = as.factor(demoDF_N$City)
demoDF_N$Note = as.factor(demoDF_N$Note)
class(demoDF_N$Note)
demoDF_D = expand.grid(EVENT_YEAR = EVENT_YEAR, RE = RE, City = City, Note = Note)
demoDF_N$Denominator = sample(90:120, 162, replace = TRUE)
# Function calls
res1 = func_3(demoDF_N, demoDF_D, EVENT_YEAR)
res2 = func_3(demoDF_N, demoDF_D, EVENT_YEAR, RE)
res3 = func_3(demoDF_N, demoDF_D, EVENT_YEAR, City)
res4 = func_3(demoDF_N, demoDF_D, EVENT_YEAR, City, RE)
sum(demoDF$Numerator)
test_function = function(df1, grp1, grp2) {
grp1 <- enexpr(grp1)
grp2 <- enexpr(grp2)
result_3_N = df1 %>% # 3-year running average
group_by(!!grp1) %>%
arrange(!!grp1, !!grp2) %>%
mutate(Numerator_UPDATED = slider::slide_dbl(Numerator, sum, .before = 1, .after = 1, .complete = TRUE)) %>%
select(grp1, grp2, Numerator_UPDATED) %>% ungroup()
result_3_N = result_3_N %>% rename(Numerator = Numerator_UPDATED) # simple rename
result_3_N = result_3_N %>% filter(!is.na(Numerator)) # filter out rows not based on 3 full years
# Get EVENT_YEAR to display range of years, e.g., 2008-2010
result_3_N$EVENT_YEAR =
paste(as.integer(as.character(result_3_N$EVENT_YEAR)) - 1, '-',
as.integer(as.character(result_3_N$EVENT_YEAR)) + 1, sep = '')
result_3_D = df1 %>% # 3-year running average
group_by(!!grp1) %>%
arrange(!!grp1, !!grp2) %>%
mutate(Denominator_UPDATED = slider::slide_dbl(Denominator, sum, .before = 1, .after = 1, .complete = TRUE)) %>%
select(grp1, grp2, Denominator_UPDATED) %>% ungroup()
result_3_D = result_3_D %>% rename(Denominator = Denominator_UPDATED) # simple rename
result_3_D = result_3_D %>% filter(!is.na(Denominator)) # filter out rows not based on 3 full years
# Get EVENT_YEAR to display range of years, e.g., 2008-2010
result_3_D$EVENT_YEAR =
paste(as.integer(as.character(result_3_D$EVENT_YEAR)) - 1, '-',
as.integer(as.character(result_3_D$EVENT_YEAR)) + 1, sep = '')
# Trying to join the results; but by = c(...)) NOT working
result3 = result_3_N %>% right_join(result_3_D, by = c(as.character(grp1), as.character(grp2))) %>%
replace_na(list(Denominator = 0)) %>% replace_na(list(Numerator = 0))
result3[, c('LCL', 'UCL', 'Rate', 'std_error', 'RSE')] =
basic_compute(result3$Numerator, result3$Denominator)
result3
}
results3 = test_function(demoDF_N, Note, EVENT_YEAR)
head(results3, n = 30)