Hi Juan,
I used the group_by and summarize functions from the tidyverse package to calculate the earliest dates and then joined them to the table with missing data.
install.packages("tidyverse")
library(tidyverse)
# Create the first table
id1 <- c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
x <- c("A", "C", "C", "C", "B", "C", "B", "A", "A", "A")
y <- c(1, 3, 3, 2, 2, 1, 3, 2, 3, 1)
z <- c(NA, NA, NA, NA, "2019-08-04", "2019-09-18", "2019-12-17", "2019-11-02", "2020-03-16", "2020-01-31")
table1 <- data.frame(id = id1, x, y, z = as.Date(z))
# Create the second table
id2 <- c(1, 1, 1, 2, 2, 2, 3, 3, 4, 4)
date <- c("2012-09-25", "2012-03-26", "2012-11-12", "2012-05-04", "2012-02-24", "2012-05-30", "2012-02-15", "2012-02-15", "2012-03-13", "2012-05-18")
table2 <- data.frame(id = id2, date = as.Date(date))
# Calculate earliest dates
firstDates <-
table2 |>
group_by(id) |>
summarize(date = min(date))
# Replace NAs
table1 |>
left_join(firstDates, by = "id") |>
mutate(z = case_when(
is.na(z) ~ date,
TRUE ~ z
)) |>
select(-c(date))
Let me know if you have any questions about what I did.
Matt