I want the left join to match only with fields that are not NA in line_df.
I.e. row 2 in line_df should get an ID = 1 since the text "vb" in text 3 should be discarded
id_df <- tribble(~ ID, ~ Text_1, ~ Text_2, ~ Text_3,
1, "apple", "orange", NA,
2, NA, "banana", NA,
3, NA, NA, "pear",
4, "citrus", NA, "grape",
5, NA, "carrot", NA)
line_df <- tribble(~ Line, ~ Text_1, ~ Text_2, ~ Text_3,
1, "apple", "xx", NA,
2, "apple", "orange", "vb",
3, NA, "carrot", NA,
4, "citrus", NA, "grape",
5, "ee", NA, "grape",
6, NA, NA, "pear",
7, "dd", NA, "pear",
8, NA, "carrot", NA)
left_join(line_df, id_df, by = c("Text_1", "Text_2", "Text_3"))
I want the left join to produce the same output line_df as you got for each line_df
line_df <- tribble(~ Line, ~ Text_1, ~ Text_2, ~ Text_3, ~ ID,
1, "apple", "xx", NA,NA,
2, "apple", "orange", "vb",1,
3, NA, "carrot", NA,5,
4, "citrus", NA, "grape",4,
5, "ee", NA, "grape",NA,
6, NA, NA, "pear",3,
7, "dd", NA, "pear",3,
8, NA, "carrot", NA,5)
Hence, the left join shall only consider values in id_df that are not NA when performing the left join