Onset variable based on the value of previous year

In my dataset, I have the following variables:

  • gid = cell identifier
  • Year
  • Battle: count per year
  • Incidence: if at least one battle happened that year in that cell.

For the construction of the incidence variable, I have used the following code: test$IncidenceBattles <-ifelse(Test$Battles>= 1,c(1), c(0))

I would like to create a binary variable OnsetBattle that equals 1 if we observe at least 1 conflict in a particular year but none in the preceding year.

Example in cell 146572. It will be equal to 1 if a conflict happened in year 2007 but not in the previous year 2006, otherwise = 0.

 structure(list(gid = c(146572, 146572, 146572, 146572, 146572, 
                                146573, 146574, 146574, 146574, 146574, 146574, 146574, 146575, 
                                146576, 146577, 146577, 146578, 146579, 146580, 146581, 146582, 
                                146583, 146583, 146583, 146583, 146583, 146583, 146583, 146583, 
                                146583, 146583, 146584, 146585, 146586, 146586, 146587, 146588, 
                                146589, 146589, 146589, 146589, 146589, 146589, 146589, 146589, 
                                146590, 146591, 146591, 146592, 146593, 146594, 146595, 146595, 
                                146595, 146596, 146597, 146598, 146598, 146599, 146600, 146600
      ), Year = c(2006, 2005, 2008, 2007, 2009, NA, 2005, 2003, 2007, 
                  2004, 2006, 2008, 2010, 2008, 2004, 2003, NA, 1997, 2008, NA, 
                  1997, 1997, 2009, 2003, 1998, 2000, 2002, 2001, 2008, 1999, 2010, 
                  2004, NA, 1998, 2002, NA, NA, 1997, 2000, 2002, 2009, 1998, 2001, 
                  1999, 2003, NA, 1997, 2002, NA, NA, NA, 2002, 2006, 2005, NA, 
                  2004, 2004, 2010, NA, 2006, 2000), Battles = c(0, 1, 1, 3, 5, 
                                                                 NA, 1, 0, 4, 0, 0, 0, 1, 1, 1, 1, NA, 1, 2, NA, 1, 2, 2, 0, 28, 
                                                                 3, 1, 11, 0, 4, 0, 0, NA, 0, 1, NA, NA, 13, 8, 2, 0, 12, 10, 
                                                                 7, 0, NA, 1, 2, NA, NA, NA, 1, 0, 0, NA, 1, 1, 1, NA, 4, 12), 
      IncidenceBattles = c(0, 1, 1, 1, 1, NA, 1, 0, 1, 0, 0, 0, 
                           1, 1, 1, 1, NA, 1, 1, NA, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 
                           0, NA, 0, 1, NA, NA, 1, 1, 1, 0, 1, 1, 1, 0, NA, 1, 1, NA, 
                           NA, NA, 1, 0, 0, NA, 1, 1, 1, NA, 1, 1)), class = c("grouped_df", 
                                                                               "tbl_df", "tbl", "data.frame"), row.names = c(NA, -61L), groups = structure(list(
                                                                                 gid = c(146572, 146573, 146574, 146575, 146576, 146577, 146578, 
                                                                                         146579, 146580, 146581, 146582, 146583, 146584, 146585, 146586, 
                                                                                         146587, 146588, 146589, 146590, 146591, 146592, 146593, 146594, 
                                                                                         146595, 146596, 146597, 146598, 146599, 146600), .rows = structure(list(
                                                                                           1:5, 6L, 7:12, 13L, 14L, 15:16, 17L, 18L, 19L, 20L, 21L, 
                                                                                           22:31, 32L, 33L, 34:35, 36L, 37L, 38:45, 46L, 47:48, 
                                                                                           49L, 50L, 51L, 52:54, 55L, 56L, 57:58, 59L, 60:61), ptype = integer(0), class = c("vctrs_list_of", 
                                                                                                                                                                             "vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
                                                                                                                                                                             ), row.names = c(NA, -29L), .drop = TRUE))

Hi @Mtrs
Is this what you want?

suppressPackageStartupMessages(library(tidyverse))

df <- structure(list(gid = c(146572, 146572, 146572, 146572, 146572, 
                       146573, 146574, 146574, 146574, 146574, 146574, 146574, 146575, 
                       146576, 146577, 146577, 146578, 146579, 146580, 146581, 146582, 
                       146583, 146583, 146583, 146583, 146583, 146583, 146583, 146583, 
                       146583, 146583, 146584, 146585, 146586, 146586, 146587, 146588, 
                       146589, 146589, 146589, 146589, 146589, 146589, 146589, 146589, 
                       146590, 146591, 146591, 146592, 146593, 146594, 146595, 146595, 
                       146595, 146596, 146597, 146598, 146598, 146599, 146600, 146600
      ), Year = c(2006, 2005, 2008, 2007, 2009, NA, 2005, 2003, 2007, 
                  2004, 2006, 2008, 2010, 2008, 2004, 2003, NA, 1997, 2008, NA, 
                  1997, 1997, 2009, 2003, 1998, 2000, 2002, 2001, 2008, 1999, 2010, 
                  2004, NA, 1998, 2002, NA, NA, 1997, 2000, 2002, 2009, 1998, 2001, 
                  1999, 2003, NA, 1997, 2002, NA, NA, NA, 2002, 2006, 2005, NA, 
                  2004, 2004, 2010, NA, 2006, 2000), 
         Battles = c(0, 1, 1, 3, 5, 
                     NA, 1, 0, 4, 0, 0, 0, 1, 1, 1, 1, NA, 1, 2, NA, 1, 2, 2, 0, 28, 
                     3, 1, 11, 0, 4, 0, 0, NA, 0, 1, NA, NA, 13, 8, 2, 0, 12, 10, 
                     7, 0, NA, 1, 2, NA, NA, NA, 1, 0, 0, NA, 1, 1, 1, NA, 4, 12), 
      IncidenceBattles = c(0, 1, 1, 1, 1, NA, 1, 0, 1, 0, 0, 0, 
                           1, 1, 1, 1, NA, 1, 1, NA, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 
                           0, NA, 0, 1, NA, NA, 1, 1, 1, 0, 1, 1, 1, 0, NA, 1, 1, NA, 
                           NA, NA, 1, 0, 0, NA, 1, 1, 1, NA, 1, 1)), 
      class = c("grouped_df", 
                 "tbl_df", "tbl", "data.frame"), row.names = c(NA, -61L), 
      groups = structure(list(
                   gid = c(146572, 146573, 146574, 146575, 146576, 146577, 146578, 
                           146579, 146580, 146581, 146582, 146583, 146584, 146585, 146586, 
                           146587, 146588, 146589, 146590, 146591, 146592, 146593, 146594, 
                           146595, 146596, 146597, 146598, 146599, 146600), 
                   .rows = structure(list(
                             1:5, 6L, 7:12, 13L, 14L, 15:16, 17L, 18L, 19L, 20L, 21L, 
                             22:31, 32L, 33L, 34:35, 36L, 37L, 38:45, 46L, 47:48, 
                             49L, 50L, 51L, 52:54, 55L, 56L, 57:58, 59L, 60:61), 
                             ptype = integer(0),
                             class = c("vctrs_list_of", 
                                       "vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
                                       ), row.names = c(NA, -29L), .drop = TRUE))

df %>% 
  ungroup() %>% 
  arrange(gid, Year) %>% 
  group_by(gid) %>% 
  mutate(lag_battles = lag(Battles, n=1),
         OnsetBattle = ifelse(IncidenceBattles==1 & lag_battles==0, 1, 0))
#> # A tibble: 61 × 6
#> # Groups:   gid [29]
#>       gid  Year Battles IncidenceBattles lag_battles OnsetBattle
#>     <dbl> <dbl>   <dbl>            <dbl>       <dbl>       <dbl>
#>  1 146572  2005       1                1          NA          NA
#>  2 146572  2006       0                0           1           0
#>  3 146572  2007       3                1           0           1
#>  4 146572  2008       1                1           3           0
#>  5 146572  2009       5                1           1           0
#>  6 146573    NA      NA               NA          NA          NA
#>  7 146574  2003       0                0          NA           0
#>  8 146574  2004       0                0           0           0
#>  9 146574  2005       1                1           0           1
#> 10 146574  2006       0                0           1           0
#> # … with 51 more rows

Created on 2022-10-27 with reprex v2.0.2

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.