Hi @MyKo101 ,
Nice MWE. I'm sure there's a simpler way, but two joins of ref1 and ref2 with df and then finding the non-NA values via coalesce works.
library("dplyr")
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library("data.table")
#>
#> Attaching package: 'data.table'
#> The following objects are masked from 'package:dplyr':
#>
#> between, first, last
ref1 <- data.table(
V1 = c("a","a","a","b","b","b","e","e"),
V2 = c("1","2","3","4","5","6","10","11"),
x = c(43,66,12,19,92,30,44,56)
)
ref2 <- data.table(
V1 = c("c","d"),
x = c(100,200)
)
df <- data.table(
V1 = c("a","b","c","d","f"),
V2 = c("2","4","7","9","14")
)
df1 <- ref1[df, on = c("V1", "V2")]
df2 <- ref2[df, on = c("V1")]
(df3 <- df1[df2, on = c("V1", "V2")] %>%
mutate(x = coalesce(x, i.x)) %>%
select(V1, V2, x))
#> V1 V2 x
#> 1: a 2 66
#> 2: b 4 19
#> 3: c 7 100
#> 4: d 9 200
#> 5: f 14 NA
target <- data.table(
V1 = c("a","b","c","d","f"),
V2 = c("2","4","7","9","14"),
x = c(66,19,100,200,NA)
)
identical(df3, target)
#> [1] TRUE
Created on 2021-09-29 by the reprex package (v2.0.1)