These are the two steps in my code below:
- create a column which calculates the time difference between the diagnosis and inpatient start dates
- create two more columns (
case_1
and case_2
) that test the conditions you explained in your question:
# Load packages
library(lubridate)
library(dplyr)
# Recreate the dataset and add time_difference co
mydata <- data.frame(
disease_diagnosis_date = mdy(c("12/20/20", "11/9/20", "5/7/20", "6/5/20", "9/3/20", "11/2/20")),
inpatient_start_date = mdy(c("12/30/20", "12/10/20", "5/7/20", "8/5/20", "12/8/20", "11/4/20"))
)
# Create new columns
mydata %>%
mutate(
time_difference = as.numeric(inpatient_start_date - disease_diagnosis_date),
case_1 = time_difference <= 14 & time_difference > 0,
case_2 = time_difference <= 14 & time_difference >= 0
)
disease_diagnosis_date inpatient_start_date time_difference case_1 case_2
1 2020-12-20 2020-12-30 10 TRUE TRUE
2 2020-11-09 2020-12-10 31 FALSE FALSE
3 2020-05-07 2020-05-07 0 FALSE TRUE
4 2020-06-05 2020-08-05 61 FALSE FALSE
5 2020-09-03 2020-12-08 96 FALSE FALSE
6 2020-11-02 2020-11-04 2 TRUE TRUE
Your original question was about filtering the data, so here is the code:
# Load packages
library(lubridate)
library(dplyr)
# Recreate the dataset and add time_difference column
mydata <- data.frame(
disease_diagnosis_date = mdy(c("12/20/20", "11/9/20", "5/7/20", "6/5/20", "9/3/20", "11/2/20")),
inpatient_start_date = mdy(c("12/30/20", "12/10/20", "5/7/20", "8/5/20", "12/8/20", "11/4/20"))
) %>%
mutate(time_difference = as.numeric(inpatient_start_date - disease_diagnosis_date))
# Filter for case 1
mydata %>%
filter(time_difference <= 14, time_difference > 0)
# Filter for case 2
mydata %>%
filter(time_difference <= 14, time_difference >= 0)
Hope this helps.