Merge in between dates

I have the following 2 dataframes

Data = data.frame(Name = c("Steve", "Sephiroth", "Hero"),
                  Date = c("03/01/2019", "01/09/2017", "06/07/2018"))

Map = data.frame(Name = c("Steve",  "Steve",    "Steve" ,"Sephiroth",   "Sephiroth" ,"Sephiroth",   "Hero", "Hero", "Hero"),
                 Class = c("1A",    "2B"    ,"3A",  "1E",   "2C",   "3E",   "1A",   "2D"    ,"3A"),
                 StartDate = c("01/03/2017" ,"01/06/2018"   ,"27/02/2019"   ,"01/05/2017"   ,"01/01/2018"   ,"01/01/2019"   ,"01/05/2017",  "01/01/2018",   "09/09/2019"),
                 EndDate = c("31/12/2017",  "31/10/2018",   "31/09/2019",   "08/06/2017",   "31/12/2018",   "31/12/2019",   "29/11/2017",   "29/08/2018",   "20/11/2019"))

I want the create a new dataframe by merging them according to their dates. If the "Date" falls in between "StartDate" and "EndDate", it should take the Class accordingly. For Example, Hero's date is 06/07/2018, so it should return 2D.

When merging, I do not want the StartDate and EndDate to be included in the dataframe. And if the Date does not fall in between any of the date range, it should return NA.
*I am not able to use library(sqldf) as my workspace does not have the package and no internet connection to download it

edit *

suppressPackageStartupMessages({
  library(dplyr)
  library(lubridate)
  
})

# Variable name Date conflicts with lubridate::Date
Data <- data.frame(
  Name = c("Steve", "Sephiroth", "Hero"),
  Dated = dmy(c("03/01/2019", "01/09/2017", "06/07/2018"))
)

#  EndDate[3] "31/09/2019" is a non-parseable date string that
#  results in warning only
Map <- data.frame(
  Name = c("Steve", "Steve", "Steve", "Sephiroth", "Sephiroth", "Sephiroth", "Hero", "Hero", "Hero"),
  Class = c("1A", "2B", "3A", "1E", "2C", "3E", "1A", "2D", "3A"),
  StartDate = dmy(c("01/03/2017", "01/06/2018", "27/02/2019", "01/05/2017", "01/01/2018", "01/01/2019", "01/05/2017", "01/01/2018", "09/09/2019")),
  EndDate = dmy(c("31/12/2017", "31/10/2018", "31/09/2019", "08/06/2017", "31/12/2018", "31/12/2019", "29/11/2017", "29/08/2018", "20/11/2019")))
#> Warning: 1 failed to parse.

Map %>% 
  mutate(Span = interval(StartDate,EndDate)) %>%
  select(Name,Class,Span) -> Map

Map %>% 
  group_by(Name) %>%
  inner_join(.,Data, by = "Name") %>%
  filter(Dated %within% Span) %>%
  select(Name,Class)
#> # A tibble: 1 x 2
#> # Groups:   Name [1]
#>   Name  Class
#>   <chr> <chr>
#> 1 Hero  2D

Map %>% 
  group_by(Name) %>%
  inner_join(.,Data, by = "Name") %>%
  mutate(Class = ifelse(
    Dated %within% Span,
    Class,
    NA)) %>% 
  ungroup() %>%
  select(Name,Class) %>%
  distinct()
#> # A tibble: 4 x 2
#>   Name      Class
#>   <chr>     <chr>
#> 1 Steve     <NA> 
#> 2 Sephiroth <NA> 
#> 3 Hero      <NA> 
#> 4 Hero      2D

Created on 2021-01-10 by the reprex package (v0.3.0.9001)

Hi thanks for the reply, when I ran the last line of script, this is what I got;

> Map %>% 
+   group_by(Name) %>%
+   inner_join(.,Data, by = "Name") %>%
+   mutate(Class = ifelse(
+     Dated %within% Span,
+     Class,
+     NA)) %>% 
+   ungroup() %>%
+   select(Name,Class) %>%
+   distinct()
# A tibble: 4 x 2
  Name      Class
  <fct>     <int>
1 Steve        NA
2 Sephiroth    NA
3 Hero         NA
4 Hero          5

I am unsure of why 5 appeared for Hero instead of 2D. A duplicate Hero shouldn't appear in the data frame as well

Try cutting and pasting:

suppressPackageStartupMessages({
  library(dplyr)
  library(lubridate)
  
})

# Variable name Date conflicts with lubridate::Date
Data <- data.frame(
  Name = c("Steve", "Sephiroth", "Hero"),
  Dated = dmy(c("03/01/2019", "01/09/2017", "06/07/2018"))
)

Map <- data.frame(
  Name = c("Steve", "Steve", "Steve", "Sephiroth", "Sephiroth", "Sephiroth", "Hero", "Hero", "Hero"),
  Class = c("1A", "2B", "3A", "1E", "2C", "3E", "1A", "2D", "3A"),
  StartDate = dmy(c("01/03/2017", "01/06/2018", "27/02/2019", "01/05/2017", "01/01/2018", "01/01/2019", "01/05/2017", "01/01/2018", "09/09/2019")),
  EndDate = dmy(c("31/12/2017", "31/10/2018", "31/09/2019", "08/06/2017", "31/12/2018", "31/12/2019", "29/11/2017", "29/08/2018", "20/11/2019")))

Map %>% 
  mutate(Span = interval(StartDate,EndDate)) %>%
  select(Name,Class,Span) -> Map

Map %>% 
  group_by(Name) %>%
  inner_join(.,Data, by = "Name") %>%
  filter(Dated %within% Span) %>%
  select(Name,Class)

Map %>% 
  group_by(Name) %>%
  inner_join(.,Data, by = "Name") %>%
  mutate(Class = ifelse(
    Dated %within% Span,
    Class,
    NA)) %>% 
  ungroup() %>%
  select(Name,Class) %>%
  distinct()

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.