Hi @frederikke. You can combine rle and diff to find the consecutive number. The diff give out the lagged differences between two numbers and rle count the length of same lagged differences (in case of consecutive number, lagged difference will be 1).
library(tidyverse)
### generate sample data
numPpl <- 100
ids <- sample(1000000:9999999, numPpl)
df <- map_dfr(ids, ~{
numMonth <- sample(1:24, 1)
months <- sample(1:24, numMonth) %>%
.[order(.)]
data.frame(`ID-number` = .x, Month = months, stringsAsFactors = FALSE)
})
######################################
df %>%
group_by(ID.number) %>%
summarise(rle = list(rle(diff(Month)))) %>%
rowwise() %>%
mutate(consecutiveMonth = max(rle$lengths[rle$values == 1], -1) + 1) %>%
mutate(moreThan12Month = ifelse(consecutiveMonth >= 12, 1, 0))
#> Source: local data frame [100 x 4]
#> Groups: <by row>
#>
#> # A tibble: 100 x 4
#> ID.number rle consecutiveMonth moreThan12Month
#> <int> <list> <dbl> <dbl>
#> 1 1110006 <rle> 6 0
#> 2 1148835 <rle> 2 0
#> 3 1170768 <rle> 4 0
#> 4 1213171 <rle> 10 0
#> 5 1285829 <rle> 15 1
#> 6 1329848 <rle> 3 0
#> 7 1600573 <rle> 4 0
#> 8 1651562 <rle> 3 0
#> 9 1779561 <rle> 17 1
#> 10 1818885 <rle> 6 0
#> # … with 90 more rows
Created on 2019-10-22 by the reprex package (v0.3.0)