Count number of events in the previous time period

I trying to create a variable (the made up one "events60" in the data below, that keeps a running count of the number of events in the past (in this example it's 60 minutes, but it could be any arbitrary value).

I'm making slow headway with cumsum, rle, diff etc. and whatnot but I'm certain there is a more elegant and quicker solution. It will be applied to a dataset of a minimum 30 million rows.

structure(list(Performed_DT_TM = structure(c(1508310211, 1508312843,
1508322697, 1508331061, 1508331161, 1508331452, 1508332222, 1508332900,
1508333781, 1508334349, 1508337531, 1508341065, 1508343542, 1508346756,
1508363905, 1508371639, 1508388245, 1508402001, 1508413612, 1508430173,
1508445426, 1508453675), class = c("POSIXct", "POSIXt"), tzone = ""),
time_since_prev_obs = c(0, 43.8666666666667, 164.233333333333,
139.4, 1.66666666666667, 4.85, 12.8333333333333, 11.3, 14.6833333333333,
9.46666666666667, 53.0333333333333, 58.9, 41.2833333333333,
53.5666666666667, 285.816666666667, 128.9, 276.766666666667,
229.266666666667, 193.516666666667, 276.016666666667, 254.216666666667,
137.483333333333), events60 = c(0, 1, 0, 0, 1, 2, 3, 4, 5,
6, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0)), row.names = c(NA,
-22L), class = "data.frame")

Any help greatly appreciated of course

Cheers
Norm

Can you please clarify what exactly the events60 variable should contain? Do you want to count the number of preceding observations that have timestamps falling in the past 60 minutes?

Yes, exactly. My goal is to create a model that has this event60 dummy variable that keeps track of how many events occurred in the immediate prior time period (E.g. 60 minutes) as a predictor of sone other outcome.
Does that make better sense?

Anyone?
It's doing my head in

This is what I came up with.

library(tidyverse)
library(lubridate)


input_df <- structure(list(
  Performed_DT_TM = structure(c(
    1508310211, 1508312843,
    1508322697, 1508331061, 1508331161, 1508331452, 1508332222, 1508332900,
    1508333781, 1508334349, 1508337531, 1508341065, 1508343542, 1508346756,
    1508363905, 1508371639, 1508388245, 1508402001, 1508413612, 1508430173,
    1508445426, 1508453675
  ), class = c("POSIXct", "POSIXt"), tzone = ""),
  time_since_prev_obs = c(
    0, 43.8666666666667, 164.233333333333,
    139.4, 1.66666666666667, 4.85, 12.8333333333333, 11.3, 14.6833333333333,
    9.46666666666667, 53.0333333333333, 58.9, 41.2833333333333,
    53.5666666666667, 285.816666666667, 128.9, 276.766666666667,
    229.266666666667, 193.516666666667, 276.016666666667, 254.216666666667,
    137.483333333333
  )
), row.names = c(
  NA,
  -22L
), class = "data.frame")



new_df <- input_df
new_df$expire_list <- list(NA)

for (i in 1:nrow(new_df))
{
  # at each new event we add an event to our expire list
  # then we check if any events in said list are older than 60 away from the current, and if so, delete them.
  if (i == 1) {
    len_prev_list <- 0
    new_df$expire_list[[i]] <- list(new_df$Performed_DT_TM[[i]] + minutes(60))
  } else {
    len_prev_list <- length(new_df$expire_list[[i - 1]])
    new_df$expire_list[[i]] <- new_df$expire_list[[i - 1]]
    new_df$expire_list[[i]][[len_prev_list + 1]] <- new_df$Performed_DT_TM[[i]] + minutes(60)
  }

  ## going in reverse order so safe to delete
  for (j in (len_prev_list + 1):1) {
    if ((new_df$Performed_DT_TM[[i]] - new_df$expire_list[[i]][[j]]) > minutes(0)
    ) {
      new_df$expire_list[[i]][[j]] <- NA
      new_df$expire_list[[i]] <- new_df$expire_list[[i]][!is.na(new_df$expire_list[[i]])]
    }
  }
}


new_df$events_60 <- lengths(new_df$expire_list)
new_df %>% select(-expire_list)
1 Like

Thanks for that, I'm just not sure it will scale up to millions of records.

But I did get a nice data.table solution form StackOverflow

setDT(DT)[, Performed_DT_TM := as.POSIXct(Performed_DT_TM, format="%Y-%-%d %T")]
setkey(DT,PatientId,Performed_DT_TM)

DT[, endtime := Performed_DT_TM - 60L*60L]
DT[, Last60 := 
     DT[DT, on=.(Performed_DT_TM>=endtime, Performed_DT_TM<Performed_DT_TM), .N, by=.EACHI]$N
   ]

Cheers
Norm

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.