Finding all possible combinations for these set conditions

ID Value Time
1 0.3 1/3/19 16:00
1 0.2 1/3/19 18:00
1 0.6 1/4/19 12:00
1 0.4 1/4/19 20:00
2 0.1 1/3/19 13:00
2 0.2 1/4/19 19:00
2 0.2 1/5/19 07:00
2 0.4 1/5/19 11:00

I am a beginner in R and am stuck on how to accomplish the following task:

I want to know, for every unique ID, is there an increase of "Value" greater or equal to >=0.3 WITHIN 48 hours.

So essentially, the following checks need to be done

For every ID, if Time2-Time1 <= 48 hrs, check if there is an instance of the value increasing by >=0.3.

The problem is, the value increasing by >=0.3 doesn't always happen right next to each other (for example for ID 2, the value does increase by 0.3 within 48 hours, but it isn't subsequent)

Any help would be appreciated.

Hi,

# set min reprex
df = as.data.frame(matrix(c(1,0.3,"1/3/19 16:00",
                            1,0.2,"1/3/19 18:00",
                            1,0.6,"1/4/19 12:00",
                            1,0.4,"1/4/19 20:00",
                            2,0.1,"1/3/19 13:00",
                            2,0.2,"1/4/19 19:00",
                            2,0.2,"1/5/19 07:00",
                            2,0.4,"1/5/19 11:00"), ncol = 3, byrow = TRUE))
colnames(df) = c("ID", "Value", "Time")

# convert to time
df$Time2  = as.POSIXct(df$Time, format = "%d/%m/%Y %H:%M")

# set min diff time to take into account
x = 48
attr(x, "units") = "hours"

by(df, df$ID, FUN = function(d) {
  c(NA, (d$Value[-1] >= 0.3) & (diff(d$Time2) >= x))
})
1 Like

The solution by @gitdemont does not satisfy
The problem is, the value increasing by >=0.3 doesn't always happen right next to each other (for example for ID 2, the value does increase by 0.3 within 48 hours, but it isn't subsequent)

library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(tibble)
library(lubridate)
#> 
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:base':
#> 
#>     date, intersect, setdiff, union

df <- tibble::tribble(
  ~ID,~Value,~Time,
1, 0.3, "1/3/19 16:00",
1, 0.2, "1/3/19 18:00",
1, 0.6, "1/4/19 12:00",
1, 0.4, "1/4/19 20:00",
2, 0.1, "1/3/19 13:00",
2, 0.2, "1/4/19 19:00",
2, 0.2, "1/5/19 07:00",
2, 0.4, "1/5/19 11:00"
)

df2 <- df %>%
  mutate(
         nr = row_number(),  
         Time=lubridate::parse_date_time(Time,"%m/%d%y %H:%M")
         )

as.data.frame(df2)
#>   ID Value                Time nr
#> 1  1   0.3 2019-01-03 16:00:00  1
#> 2  1   0.2 2019-01-03 18:00:00  2
#> 3  1   0.6 2019-01-04 12:00:00  3
#> 4  1   0.4 2019-01-04 20:00:00  4
#> 5  2   0.1 2019-01-03 13:00:00  5
#> 6  2   0.2 2019-01-04 19:00:00  6
#> 7  2   0.2 2019-01-05 07:00:00  7
#> 8  2   0.4 2019-01-05 11:00:00  8

df3 <- df2 %>%
  inner_join(df2,by=c('ID'='ID')) %>% 
  mutate(diffTime=lubridate::make_difftime(Time.y - Time.x,units="hours"),
         diffValue = Value.y - Value.x
         ) %>%
  filter((diffTime > 0) & (diffTime <= 48 ) & diffValue >= 0.3)

as.data.frame(df3)
#>   ID Value.x              Time.x nr.x Value.y              Time.y nr.y diffTime
#> 1  1     0.3 2019-01-03 16:00:00    1     0.6 2019-01-04 12:00:00    3 20 hours
#> 2  1     0.2 2019-01-03 18:00:00    2     0.6 2019-01-04 12:00:00    3 18 hours
#> 3  2     0.1 2019-01-03 13:00:00    5     0.4 2019-01-05 11:00:00    8 46 hours
#>   diffValue
#> 1       0.3
#> 2       0.4
#> 3       0.3
Created on 2021-07-26 by the reprex package (v2.0.0)

You are right indeed @HanOostdijk, I was totally wrong !
Besides date conversion was also wrong in my solution

Maybe this one is better

# set min reprex
df = as.data.frame(matrix(c(1,0.3,"1/3/19 16:00",
                            1,0.2,"1/3/19 18:00",
                            1,0.6,"1/4/19 12:00",
                            1,0.4,"1/4/19 20:00",
                            2,0.1,"1/3/19 13:00",
                            2,0.2,"1/4/19 19:00",
                            2,0.2,"1/5/19 07:00",
                            2,0.4,"1/5/19 11:00"), ncol = 3, byrow = TRUE))
colnames(df) = c("ID", "Value", "Time")

# convert to time
df$Time2  = as.POSIXct(df$Time, format = "%m/%d/%y %H:%M")

df

by(df, df$ID, FUN = function(d) {
  if(nrow(d) == 1) return(NULL)
  apply(combn(1:nrow(d), 2), 2, FUN = function(i_comb) {
      if(diff(as.numeric(d[i_comb, "Value"])) >= 0.3 & difftime(df[i_comb[2], "Time2"], df[i_comb[1], "Time2"], units = "hours") <= 48)
        return(d[i_comb,])
    })
})

EDIT: sorry for the multiple editions

and another one to remove NULL

by(df, df$ID, FUN = function(d) {
  if(nrow(d) == 1) return(NULL)
  Filter(Negate(is.null), apply(combn(1:nrow(d), 2), 2, FUN = function(i_comb) {
      if(diff(as.numeric(d[i_comb, "Value"])) >= 0.3 & difftime(df[i_comb[2], "Time2"], df[i_comb[1], "Time2"], units = "hours") <= 48)
        return(d[i_comb,])
    }))
})

Thank you both so much for the help

This topic was automatically closed 21 days after the last reply. New replies are no longer allowed.

If you have a query related to it or one of the replies, start a new topic and refer back with a link.