alternative to multiple for loops for calculations on 2 dataframes based on conditions

Hello,
I would like to calculate nrow() in one dataframe, depending on ID number and date in another dataframe. I used multiple for()loops below, but would like to get more efficient, such as using join() or apply(). I hope someone can show me a better way.

I used 2 dataframes; bp containing data of patients who measured systolic blood pressure at home; and ap containing the date of a doctor's appointment (apart from the same patient ID's).
I would like to see how often patients measured their blood pressure before and after a doctors appointment for a particular period of time; here 2 weeks and 1 month:

library(lubridate, warn.conflicts = FALSE)

bp <- data.frame(stringsAsFactors = FALSE,
                sub = c(1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4),
                dat = c("2016-04-02 11:17:00 CEST", "2016-04-16 12:32:00 CEST","2016-05-02 12:29:00 CEST", "2016-05-03 09:07:00 CEST", "2016-05-04 16:33:00 CEST", "2016-05-06 04:59:00 CEST", "2016-05-07 20:13:00 CEST", "2016-05-09 19:12:00 CEST", "2016-05-12 12:36:00 CEST", "2016-05-24 12:10:00 CEST", "2016-05-26 15:17:00 CEST", "2016-05-28 19:02:00 CEST", "2016-05-31 06:01:00 CEST", "2016-06-03 08:20:00 CEST", "2016-06-14 12:54:00 CEST", "2016-06-18 07:32:00 CEST", "2016-06-21 05:54:00 CEST","2016-06-24 05:36:00 CEST", "2016-06-26 11:46:00 CEST", "2016-06-28 17:30:00 CEST", "2016-06-30 18:55:00 CEST", "2016-07-02 11:53:00 CEST", "2016-07-04 08:57:00 CEST", "2016-07-06 19:22:00 CEST", "2016-07-08 23:19:00 CEST", "2016-07-11 12:08:00 CEST", "2016-05-14 20:11:00 CEST", "2016-05-16 01:42:00 CEST", "2016-05-17 15:07:00 CEST", "2016-05-19 05:50:00 CEST", "2016-05-21 04:48:00 CEST", "2016-11-21 11:09:00 CET" , "2016-11-23 19:16:00 CET" , "2016-11-26 10:14:00 CET" , "2016-11-28 20:25:00 CET" , "2016-12-01 09:44:00 CET" , "2016-12-03 19:17:00 CET" , "2016-12-06 10:08:00 CET" , "2016-12-08 15:46:00 CET" , "2016-12-11 06:50:00 CET" , "2016-12-13 15:59:00 CET" , "2016-12-16 10:27:00 CET" , "2016-12-18 18:44:00 CET" , "2016-12-20 11:55:00 CET" , "2016-12-21 21:26:00 CET" , "2016-12-24 12:45:00 CET","2016-12-26 16:15:00 CET" ,"2016-12-28 18:50:00 CET" ,"2016-12-30 12:22:00 CET" ,"2017-01-01 16:06:00 CET" ,"2017-01-03 21:07:00 CET" ,"2017-01-06 06:57:00 CET" ,"2017-01-08 13:10:00 CET" ,"2017-01-10 21:33:00 CET" ,"2017-01-13 12:43:00 CET" ,"2017-01-15 19:13:00 CET" ,"2017-01-17 12:59:00 CET" ,"2017-01-19 17:01:00 CET" ,"2017-01-21 12:52:00 CET" ,"2017-01-23 19:18:00 CET" ,"2017-01-26 07:36:00 CET" ,"2017-01-28 12:40:00 CET" ,"2017-01-30 10:16:00 CET" ,"2017-02-01 07:42:00 CET" ,"2017-02-03 00:27:00 CET" ,"2017-02-04 16:05:00 CET" ,"2017-02-06 07:31:00 CET" ,"2017-02-07 21:36:00 CET" ,"2017-02-09 12:46:00 CET" ,"2017-02-10 21:32:00 CET" ,"2017-02-12 12:30:00 CET" ,"2017-02-14 10:07:00 CET" ,"2017-02-16 07:49:00 CET" ,"2017-02-17 19:27:00 CET" ,"2017-02-20 18:38:00 CET" , "2017-02-22 15:30:00 CET" , "2017-02-24 07:52:00 CET" , "2017-02-26 07:28:00 CET" , "2017-02-28 06:46:00 CET" , "2017-08-24 12:55:00 CEST", "2017-08-26 15:08:00 CEST", "2017-08-31 19:14:00 CEST", "2017-09-05 07:19:00 CEST", "2017-09-10 07:34:00 CEST", "2017-09-15 12:38:00 CEST", "2017-09-20 22:32:00 CEST", "2017-09-26 07:15:00 CEST", "2017-09-27 19:57:00 CEST", "2017-10-02 14:31:00 CEST", "2017-10-08 19:31:00 CEST", "2017-10-13 15:38:00 CEST", "2017-10-18 07:18:00 CEST", "2017-10-23 09:26:00 CEST", "2017-10-27 19:47:00 CEST", "2017-11-02 13:20:00 CET" , "2017-11-08 07:12:00 CET" , "2017-11-13 14:58:00 CET" , "2017-11-18 13:33:00 CET" , "2017-11-23 22:50:00 CET" , "2017-11-29 19:22:00 CET" , "2017-12-04 14:07:00 CET" , "2017-12-09 15:02:00 CET" , "2016-06-03 22:00:00 CEST", "2016-06-04 13:59:00 CEST", "2016-06-08 14:50:00 CEST", "2016-06-14 12:41:00 CEST", "2016-07-02 11:30:00 CEST", "2016-07-03 16:41:00 CEST", "2016-07-18 15:41:00 CEST", "2016-08-15 02:32:00 CEST","2016-09-03 15:48:00 CEST","2016-09-07 00:51:00 CEST","2016-09-29 12:15:00 CEST"),
                sys = c(111,144,125,125,130,137,135,137,133,128,132,139,127,127,130,130,127,126,132,130,131,134,124,127,128,131,140,129,137,134,131,122,119,122,123,130,126,122,123,126,122,126,127,125,124,127,127,127,128,127,122,126,127,123,127,125,123,123,123,123,123,123,123,122,127,125,123,123,123,126,124,123,123,125,123,125,122,122,125,129,128,129,127,130,126,136,128,129,133,132,129,128,128,127,125,131,132,127,128,128,127,127,130,129,131,130,137,132,130,127,126,127,126)
                )
bp$dat <- as.POSIXct(bp$dat)
                
ap <- data.frame(stringsAsFactors = FALSE,                
                  sub = c(1, 2, 2, 3, 3, 3, 4, 4),              
                  dat = c("2016-05-02", "2017-01-16", "2017-01-16","2017-09-26", "2017-09-26", "2017-09-26", "2016-07-02", "2016-09-02")
                  )
ap$dat <- as.Date(ap$dat, format ="%Y-%m-%d") 
  
ap$pre.n14d <- 2
for (i in 1:nrow(ap)){
  pre.n14d <- nrow(subset(bp, sub == ap$sub[i] & dat < ap$dat[i] & dat > ap$dat[i]- days(16)))
  ap$pre.n14d[[i]] <- pre.n14d
}

ap$post.n14d <- 2
for (i in 1:nrow(ap)){
  post.n14d <- nrow(subset(bp, sub == ap$sub[i] & dat> ap$dat[i] & dat < ap$dat[i]+ days(16)))
  ap$post.n14d[[i]] <- post.n14d
}

ap$pre.n30d <- 2
for (i in 1:nrow(ap)){
  pre.n30d <- nrow(subset(bp, sub == ap$sub[i] & dat < ap$dat[i] & dat > ap$dat[i]- days(31)))
  ap$pre.n30d[[i]] <- pre.n30d
}

ap$post.n30d <- 2
for (i in 1:nrow(ap)){
  post.n30d <- nrow(subset(bp, sub == ap$sub[i] & dat> ap$dat[i] & dat < ap$dat[i]+ days(31)))
  ap$post.n30d[[i]] <- post.n30d
}

ap
#>   sub        dat pre.n14d post.n14d pre.n30d post.n30d
#> 1   1 2016-05-02        1        10        2        16
#> 2   2 2017-01-16        7         7       15        16
#> 3   2 2017-01-16        7         7       15        16
#> 4   3 2017-09-26        3         4        6         7
#> 5   3 2017-09-26        3         4        6         7
#> 6   3 2017-09-26        3         4        6         7
#> 7   4 2016-07-02        0         2        4         3
#> 8   4 2016-09-02        0         2        1         3

Created on 2020-05-25 by the reprex package (v0.3.0)

Maybe something like this?

library(tidyverse)
library(lubridate, warn.conflicts = FALSE)

# Code to generate data frames excluded.

ap %>% 
  distinct() %>% 
  left_join(bp, by = "sub", suffix = c("_ap", "_bp")) %>% 
  group_by(sub, dat_ap) %>% 
  mutate(pre.n14d = dat_bp %within% interval(dat_ap - weeks(2), dat_ap),
         post.n14d = dat_bp %within% interval(dat_ap, dat_ap + weeks(2)),
         pre.n30d = dat_bp %within% interval(dat_ap - months(1), dat_ap),
         post.n30d = dat_bp %within% interval(dat_ap, dat_ap + months(1))) %>% 
  summarise_at(vars(matches("n\\d{2}d")), sum) %>% 
  ungroup()
#> # A tibble: 5 x 6
#>     sub dat_ap     pre.n14d post.n14d pre.n30d post.n30d
#>   <dbl> <date>        <int>     <int>    <int>     <int>
#> 1     1 2016-05-02        0         9        2        16
#> 2     2 2017-01-16        6         6       15        16
#> 3     3 2017-09-26        2         4        6         7
#> 4     4 2016-07-02        0         2        4         3
#> 5     4 2016-09-02        0         2        1         3

Created on 2020-05-26 by the reprex package (v0.3.0)

1 Like

This topic was automatically closed 7 days after the last reply. New replies are no longer allowed.

n is matching the first letter of the variable after .. You can remove it since the pattern \\d{2}d will also match the required columns.

The link you've posted is an excellent resource for learning the basics of R.

I learned several new terms, thank you again! :blush:
I'm new to regexps, trying to decipher "n\\d{2}d". I'm not sure what n means in this case.
I could figure out
\\d : any digit,
{2} exactly 2 (digits), and
d the last letter of the variable.
I found the following website helpful: https://r4ds.had.co.nz/strings.html