My solution:
- Join the data against itself by household, so that each record is matched against all others in the same household.
- Ignore matches of records against themselves.
- See if any of the matches overlap.
- Collapse those down by unique interval (household by person by start time), flagging any interval that overlaps at least one other interval.
Writing this as data.table code:
library(data.table)
mydata <- data.table(
household = c(1, 1, 1, 1, 1, 2, 2),
person = c(1, 2, 3, 4, 1, 1, 2),
start_time = c(
"07:45:00", "09:45:00", "22:45:00", "08:45:00",
"23:50:00", "07:45:00", "16:45:00"
),
end_time = c(
"21:45:00", "17:45:00", "23:45:00", "01:45:00",
"24:00:00", "21:45:00", "22:45:00"
)
)
# Step 1: join dataset to itself
mydata2 <- mydata[
mydata,
on = "household",
allow.cartesian = TRUE
][
# Step 2: ignore self-self pairs
person != i.person,
{
# Step 3: overlap is when either start is between the other start-end range
pair_overlap <- between(.BY[["start_time"]], i.start_time, i.end_time) |
between(i.start_time, .BY[["start_time"]], .BY[["end_time"]])
# Step 4: collapse to a single value
list(overlap = any(pair_overlap))
},
by = list(household, person, start_time, end_time)
]
mydata2
# household person start_time end_time overlap
# 1: 1 2 09:45:00 17:45:00 TRUE
# 2: 1 3 22:45:00 23:45:00 FALSE
# 3: 1 4 08:45:00 01:45:00 TRUE
# 4: 1 1 07:45:00 21:45:00 TRUE
# 5: 1 1 23:50:00 24:00:00 FALSE
# 6: 2 2 16:45:00 22:45:00 TRUE
# 7: 2 1 07:45:00 21:45:00 TRUE
Without the comments, the code's fairly compact. If you're used to basic data.table syntax, it's also easy to read.
mydata2 <- mydata[
mydata,
on = "household",
allow.cartesian = TRUE
][
person != i.person,
list(overlap = any(
between(.BY[["start_time"]], i.start_time, i.end_time) |
between(i.start_time, .BY[["start_time"]], .BY[["end_time"]])
)),
by = list(household, person, start_time, end_time)
]