R data.table conditional join

I am trying to conditionally join two data.tables:

ex <- structure(list(person_id = c("79d8c6ee-62f4-4a09-a31e-a3d1a48d79a8", 
"79d8c6ee-62f4-4a09-a31e-a3d1a48d79a8", "79d8c6ee-62f4-4a09-a31e-a3d1a48d79a8", 
"79d8c6ee-62f4-4a09-a31e-a3d1a48d79a8", "79d8c6ee-62f4-4a09-a31e-a3d1a48d79a8", 
"79d8c6ee-62f4-4a09-a31e-a3d1a48d79a8", "79d8c6ee-62f4-4a09-a31e-a3d1a48d79a8", 
"79d8c6ee-62f4-4a09-a31e-a3d1a48d79a8", "8b6ea77b-e694-48fb-a9e9-ca8bf1accc65", 
"8b6ea77b-e694-48fb-a9e9-ca8bf1accc65", "8b6ea77b-e694-48fb-a9e9-ca8bf1accc65", 
"8b6ea77b-e694-48fb-a9e9-ca8bf1accc65"), prs_nat_key = c("8240588160001", 
"8240588160001", "8240588160001", "8240588160001", "8240588160001", 
"8240588160001", "8240588160001", "8240588160001", "106705689", 
"106705689", "106705689", "106705689"), serv_from_dt = structure(c(18262, 
18262, 18262, 18262, 18275, 18275, 18275, 18275, 18278, 18278, 
18278, 18278), class = "Date"), serv_to_dt = structure(c(18262, 
18262, 18262, 18265, 18275, 18278, 18278, 18278, 18282, 18282, 
18299, 18299), class = "Date"), new_pos = c("IP", "IP", "IP", 
"IP", "IP", "IP", "IP", "IP", "IP", "IP", "IP", "IP"), days_diff = c(0, 
0, 0, 3, 0, 3, 3, 3, 4, 4, 21, 21)), row.names = c(NA, -12L), class = c("data.table", 
"data.frame"))

with

date_period <- structure(list(prs_nat_key = c("106705689", "8240588160001", 
"8240588160001"), unique_start = structure(c(18278, 18262, 18275
), class = "Date"), unique_end = structure(c(18299, 18265, 18278
), class = "Date")), row.names = c(NA, -3L), class = c("data.table", 
"data.frame"))

I have this code to conditionally join data.tables :

setDT(ex)
setDT(date_period)
setkey(date_period, prs_nat_key)
setkey(ex, prs_nat_key)
ex[, c("end_date") := # Assign the below result to new columns in dtgrouped2
     date_period[ex, # join
                 .(unique_end), # get the column you need
                 on = .(unique_start <= serv_from_dt,
                        unique_end >= serv_to_dt,
                        prs_nat_key = prs_nat_key)]]

I want the join to be: if the rows match by prs_nat_key, and ex$serv_to_dt <= date_period$unique_end & ex$serv_from_dt >= date_period$unique_start, then join.

The output I'm getting is:

output <- structure(list(person_id = c("79d8c6ee-62f4-4a09-a31e-a3d1a48d79a8", 
"79d8c6ee-62f4-4a09-a31e-a3d1a48d79a8", "79d8c6ee-62f4-4a09-a31e-a3d1a48d79a8", 
"79d8c6ee-62f4-4a09-a31e-a3d1a48d79a8", "79d8c6ee-62f4-4a09-a31e-a3d1a48d79a8", 
"79d8c6ee-62f4-4a09-a31e-a3d1a48d79a8", "79d8c6ee-62f4-4a09-a31e-a3d1a48d79a8", 
"79d8c6ee-62f4-4a09-a31e-a3d1a48d79a8", "8b6ea77b-e694-48fb-a9e9-ca8bf1accc65", 
"8b6ea77b-e694-48fb-a9e9-ca8bf1accc65", "8b6ea77b-e694-48fb-a9e9-ca8bf1accc65", 
"8b6ea77b-e694-48fb-a9e9-ca8bf1accc65"), prs_nat_key = c("8240588160001", 
"8240588160001", "8240588160001", "8240588160001", "8240588160001", 
"8240588160001", "8240588160001", "8240588160001", "106705689", 
"106705689", "106705689", "106705689"), serv_from_dt = structure(c(18262, 
18262, 18262, 18262, 18275, 18275, 18275, 18275, 18278, 18278, 
18278, 18278), class = "Date"), serv_to_dt = structure(c(18262, 
18262, 18262, 18265, 18275, 18278, 18278, 18278, 18282, 18282, 
18299, 18299), class = "Date"), new_pos = c("IP", "IP", "IP", 
"IP", "IP", "IP", "IP", "IP", "IP", "IP", "IP", "IP"), days_diff = c(0, 
0, 0, 3, 0, 3, 3, 3, 4, 4, 21, 21), start_date = structure(c(18262, 
18262, 18262, 18262, 18275, 18275, 18275, 18275, 18278, 18278, 
18278, 18278), class = "Date"), end_date = structure(c(18262, 
18262, 18262, 18265, 18275, 18278, 18278, 18278, 18282, 18282, 
18299, 18299), class = "Date")), row.names = c(NA, -12L), class = c("data.table", 
"data.frame"))

but what I'm actually looking for is:

want <- structure(list(person_id = c("79d8c6ee-62f4-4a09-a31e-a3d1a48d79a8", 
"79d8c6ee-62f4-4a09-a31e-a3d1a48d79a8", "79d8c6ee-62f4-4a09-a31e-a3d1a48d79a8", 
"79d8c6ee-62f4-4a09-a31e-a3d1a48d79a8", "79d8c6ee-62f4-4a09-a31e-a3d1a48d79a8", 
"79d8c6ee-62f4-4a09-a31e-a3d1a48d79a8", "79d8c6ee-62f4-4a09-a31e-a3d1a48d79a8", 
"79d8c6ee-62f4-4a09-a31e-a3d1a48d79a8", "8b6ea77b-e694-48fb-a9e9-ca8bf1accc65", 
"8b6ea77b-e694-48fb-a9e9-ca8bf1accc65", "8b6ea77b-e694-48fb-a9e9-ca8bf1accc65", 
"8b6ea77b-e694-48fb-a9e9-ca8bf1accc65"), prs_nat_key = c("8240588160001", 
"8240588160001", "8240588160001", "8240588160001", "8240588160001", 
"8240588160001", "8240588160001", "8240588160001", "106705689", 
"106705689", "106705689", "106705689"), serv_from_dt = structure(c(18262, 
18262, 18262, 18262, 18275, 18275, 18275, 18275, 18278, 18278, 
18278, 18278), class = "Date"), serv_to_dt = structure(c(18262, 
18262, 18262, 18265, 18275, 18278, 18278, 18278, 18282, 18282, 
18299, 18299), class = "Date"), new_pos = c("IP", "IP", "IP", 
"IP", "IP", "IP", "IP", "IP", "IP", "IP", "IP", "IP"), days_diff = c(0, 
0, 0, 3, 0, 3, 3, 3, 4, 4, 21, 21), start_date = structure(c(18262, 
18262, 18262, 18262, 18275, 18275, 18275, 18275, 18278, 18278, 
18278, 18278), class = "Date"), end_date = structure(c(18265, 
18265, 18265, 18265, 18278, 18278, 18278, 18278, 18299, 18299, 
18299, 18299), class = "Date")), row.names = c(NA, -12L), class = c("data.table", 
"data.frame"))

is there any way I can adjust my code to get what I'm looking for? Thank you!

I get a different output to yours running you code example, but regardless I think such a question is best asked on stackoverflow. They have quite a few data.table experts who actively help.

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.