Time-varying variables

Hello,
I am trying to create time-dependent variables, but I am struggling with the code to structure them.
As you can see in the reproducible example, I have several bills in four countries, but each of them has a different length - because of the legislative procedures. Therefore, some start and finish in the same year, while others continue for a long period of time.
For the GDP growth, I have yearly measurements and I would like to specify it for each bill/country/year.

mydata<-data.frame(
  stringsAsFactors = FALSE,
              bill = c("Protocolo de Asuncion para Derechos Humanos",
                       "Protocolo de Asuncion para Derechos Humanos","Protocolo de Asuncion para Derechos Humanos",
                       "Protocolo de Asuncion para Derechos Humanos"),
           country = c("Argentina", "Brazil", "Paraguay", "Uruguay"),
          dateentr = c("2006-03-22", "2006-10-17", "2006-03-29", "2006-06-09"),
          dateappr = c("2006-06-07", "2009-08-25", "2006-09-26", "2008-05-07"),
             cause = c(1L, 1L, 1L, 1L)
)

gdpgrowth_long<-data.frame(
  stringsAsFactors = FALSE,
              year = c("2006","2006","2006","2006",
                       "2006","2006","2007","2007","2007","2007","2007",
                       "2007","2008","2008","2008","2008","2008","2008",
                       "2009","2009","2009","2009","2009","2009"),
           country = c("Argentina","Brazil",
                       "Paraguay","Uruguay","...6","...7","Argentina","Brazil",
                       "Paraguay","Uruguay","...6","...7","Argentina","Brazil",
                       "Paraguay","Uruguay","...6","...7","Argentina",
                       "Brazil","Paraguay","Uruguay","...6","...7"),
         gdpgrowth = c(8.04715150365762,
                       3.96198872336626,4.80711719270674,4.09857734684473,NA,NA,
                       9.00765087916513,6.06987060717197,5.42162287220238,
                       6.5415108494737,NA,NA,4.05723310237891,5.09419544721958,
                       6.35912079324652,7.17614466103376,NA,NA,-5.91852508035113,
                       -0.125811999603498,-0.257971433717119,4.24349419543073,
                       NA,NA)
)


I appreciate your help!

I don't know if I've understood correctly, it seems that you want to match yearly gdp growth to each country during each year of the bill that take effect.
So, the key point is to retrieve the years when these bills came in to active. It is not a difficult work for lubridate package. Try below code, I took benefit from tidyr's nest data feature to simplify the code (see instructions at lubridate homepage and tidyr website).

library(tidyverse)

mydata_1 <- mydata %>% rowwise %>% 
  mutate(year = list(c(year(as_date(dateentr)):year(as_date(dateappr)))),
         year = list(as.character(year))) %>% 
  unnest_longer(year)

left_join(
  mydata_1,
  gdpgrowth_long,
  by = c("year","country")
)

you'll get

# A tibble: 9 × 8
  bill                                        country   dateentr   dateappr   cause nyear year  gdpgrowth
  <chr>                                       <chr>     <chr>      <chr>      <int> <dbl> <chr>     <dbl>
1 Protocolo de Asuncion para Derechos Humanos Argentina 2006-03-22 2006-06-07     1     0 2006      8.05 
2 Protocolo de Asuncion para Derechos Humanos Brazil    2006-10-17 2009-08-25     1     3 2006      3.96 
3 Protocolo de Asuncion para Derechos Humanos Brazil    2006-10-17 2009-08-25     1     3 2007      6.07 
4 Protocolo de Asuncion para Derechos Humanos Brazil    2006-10-17 2009-08-25     1     3 2008      5.09 
5 Protocolo de Asuncion para Derechos Humanos Brazil    2006-10-17 2009-08-25     1     3 2009     -0.126
6 Protocolo de Asuncion para Derechos Humanos Paraguay  2006-03-29 2006-09-26     1     0 2006      4.81 
7 Protocolo de Asuncion para Derechos Humanos Uruguay   2006-06-09 2008-05-07     1     2 2006      4.10 
8 Protocolo de Asuncion para Derechos Humanos Uruguay   2006-06-09 2008-05-07     1     2 2007      6.54 
9 Protocolo de Asuncion para Derechos Humanos Uruguay   2006-06-09 2008-05-07     1     2 2008      7.18 
1 Like
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(lubridate)
#> 
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:base':
#> 
#>     date, intersect, setdiff, union

# bills

bills <- data.frame(
  stringsAsFactors = FALSE,
  bill = c(
    "Protocolo de Asuncion para Derechos Humanos",
    "Protocolo de Asuncion para Derechos Humanos", "Protocolo de Asuncion para Derechos Humanos",
    "Protocolo de Asuncion para Derechos Humanos"
  ),
  country = c("Argentina", "Brazil", "Paraguay", "Uruguay"),
  dateentr = c("2006-03-22", "2006-10-17", "2006-03-29", "2006-06-09"),
  dateappr = c("2006-06-07", "2009-08-25", "2006-09-26", "2008-05-07"),
  cause = c(1L, 1L, 1L, 1L)
)

# trim unneeded variables
bills <- bills[-c(1,5)]

# economic growth
gdp <- data.frame(
  stringsAsFactors = FALSE,
  year = c(
    "2006", "2006", "2006", "2006",
    "2006", "2006", "2007", "2007", "2007", "2007", "2007",
    "2007", "2008", "2008", "2008", "2008", "2008", "2008",
    "2009", "2009", "2009", "2009", "2009", "2009"
  ),
  country = c(
    "Argentina", "Brazil",
    "Paraguay", "Uruguay", "...6", "...7", "Argentina", "Brazil",
    "Paraguay", "Uruguay", "...6", "...7", "Argentina", "Brazil",
    "Paraguay", "Uruguay", "...6", "...7", "Argentina",
    "Brazil", "Paraguay", "Uruguay", "...6", "...7"
  ),
  gdpgrowth = c(
    8.04715150365762,
    3.96198872336626, 4.80711719270674, 4.09857734684473, NA, NA,
    9.00765087916513, 6.06987060717197, 5.42162287220238,
    6.5415108494737, NA, NA, 4.05723310237891, 5.09419544721958,
    6.35912079324652, 7.17614466103376, NA, NA, -5.91852508035113,
    -0.125811999603498, -0.257971433717119, 4.24349419543073,
    NA, NA
  )
)

# trim unused rows

gdp <- gdp[complete.cases(gdp),]

# convert dates to date objects

bills$dateappr <- ymd(bills$dateappr)
bills$dateentr <- ymd(bills$dateentr)

# convert character year to numeric

gdp$year <- as.integer(gdp$year)

# calculate start/stop

bills$duration <- bills$dateappr - bills$dateentr

# test if dateentr and dateappr in same year
same_year <- function() year(bills$dateappr) == year(bills$dateentr)

# case 1: bill in same year
case1 <- bills[same_year(),]
case1$year <- year(case1$dateappr)
case1 <- left_join(case1,left_join(case1,gdp))
#> Joining with `by = join_by(country, year)`
#> Joining with `by = join_by(country, dateentr, dateappr, duration, year)`
case1 <- case1 |> group_by(country) |> summarise(avg = mean(gdpgrowth))

# bill spans more than one year

case2 <- bills[!same_year(),]
case2 <- left_join(case2,left_join(case2,gdp))
#> Joining with `by = join_by(country)`
#> Joining with `by = join_by(country, dateentr, dateappr, duration)`
case2 <- case2[-c(2,3)]
case2 <- case2 |> group_by(country) |> summarise(avg = mean(gdpgrowth))

result <- rows_append(case1,case2) |> arrange(-desc(country))
result
#> # A tibble: 4 × 2
#>   country     avg
#>   <chr>     <dbl>
#> 1 Argentina  8.05
#> 2 Brazil     3.75
#> 3 Paraguay   4.81
#> 4 Uruguay    5.51

Created on 2023-06-04 with reprex v2.0.2

Thank you! It is working now

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.